[ { "memory": 669653120, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 1, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 1, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 1, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 1, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 1, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 1, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 670308480, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 1, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 1, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 1, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 1, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 1, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 672733312, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 1, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 1, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 1, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 1, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 673388672, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 1, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 1, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 1, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 674044032, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 1, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 1, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 674699392, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 1, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 677124224, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 1, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 677779584, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 1, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 680204416, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 1, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 682629248, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 1, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 682842240, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 1, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 683497600, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 1, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 685922432, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 1, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 686135424, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 1, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 686348416, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 1, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 688773248, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 1, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 688986240, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 1, "layer.15.mlp.down_proj": 1 } }, { "memory": 691411072, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 1, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 693835904, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 1, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 694491264, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 1, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 696916096, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 1, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 699340928, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 1, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 699553920, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 1, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 701978752, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 1, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 704403584, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 1, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 705058944, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 1, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 705271936, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 1, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 705484928, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 1, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 706140288, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 1, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 706353280, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 1, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 706566272, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 1, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 708991104, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 1, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 711415936, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 1, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 713840768, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 1 } }, { "memory": 716265600, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 1, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 716920960, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 1, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 719345792, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 1, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 719558784, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 1, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 720214144, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 1, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 720427136, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 1, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 720640128, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 1, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 723064960, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 1, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 723277952, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 1, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 723490944, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 1, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 723703936, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 1, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 724359296, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 1, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 724572288, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 1, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 726997120, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 1, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 727652480, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 1, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 730077312, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 1, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 732502144, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 1, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 734926976, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 1, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 737351808, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 1, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 738007168, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 1, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 738662528, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 1, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 741087360, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 1, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 741300352, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 1, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 741513344, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 1, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 742168704, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 1, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 742824064, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 1, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 743037056, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 1, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 743250048, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 1, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 743905408, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 1, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 744118400, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 1, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 744773760, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 1, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 744986752, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 1, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 747411584, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 1, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 749836416, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 1, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 752261248, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 1, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 752916608, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 1, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 753129600, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 1, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 753784960, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 1, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 754440320, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 1, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 756865152, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 1, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 757520512, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 1, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 759945344, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 1, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 762370176, "layers": { "layer.0.self_attn.q_proj": 1, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 763025536, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 1, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 763680896, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 1, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 766105728, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 1, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 766318720, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 1, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 766531712, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 1, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 767187072, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 1, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 769611904, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 1, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 769824896, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 1, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 772249728, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 1, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 772905088, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 1, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 775329920, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 1, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 775985280, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 1, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 778410112, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 1, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 779065472, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 1, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 779720832, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 1, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 782145664, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 1, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 784570496, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 1, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 786995328, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 1, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 789420160, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 1, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 789633152, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 1, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 789846144, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 1, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 790059136, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 1, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 792483968, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 1, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 792696960, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 1, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 795121792, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 1, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 797546624, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 1, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 799971456, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 1, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 802396288, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 1, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 803051648, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 1, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 805476480, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 1, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 807901312, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 1, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 810326144, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 1, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 812750976, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 1, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 2, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 813406336, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 1, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 2, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 813619328, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 1, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 2, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 813832320, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 2, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 2, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 816257152, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 2, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 2, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 816912512, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 2, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 819337344, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 2, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 821762176, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 2, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 824187008, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 2, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 826611840, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 2, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 829036672, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 2, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 829249664, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 2, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 829905024, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 2, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 830560384, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 2, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 832985216, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 2, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 835410048, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 2, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 837834880, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 2, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 838047872, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 2, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 838703232, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 2, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 838916224, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 2, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 841341056, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 2, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 841996416, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 2, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 842651776, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 2, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 845076608, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 2, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 847501440, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 2, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 849926272, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 2, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 852351104, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 2, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 854775936, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 2, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 857200768, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 2, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 857856128, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 2, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 858511488, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 2, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 860936320, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 2, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 863361152, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 2, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 864016512, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 2, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 866441344, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 2, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 867096704, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 2, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 867752064, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 2, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 870176896, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 2, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 872601728, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 2, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 872814720, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 2, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 873027712, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 2, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 875452544, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 2, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 875665536, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 2, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 876320896, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 2, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 876976256, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 2, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 879401088, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 2, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 880056448, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 2, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 882481280, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 2, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 882694272, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 2, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 882907264, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 2, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 885332096, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 2, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 885545088, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 2, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 887969920, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 2, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 888182912, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 2, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 890607744, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 2, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 893032576, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 2, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 893687936, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 2, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 896112768, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 2, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 898537600, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 2, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 898750592, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 2, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 901175424, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 2, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 901388416, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 2, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 901601408, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 2, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 904026240, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 2, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 906451072, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 2, "layer.15.mlp.down_proj": 2 } }, { "memory": 908875904, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 2, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 909531264, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 2, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 909744256, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 2, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 909957248, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 2, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 912382080, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 2, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 914806912, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 2, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 915019904, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 2, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 915232896, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 2, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 915888256, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 2, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 916543616, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 2, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 916756608, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 2, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 916969600, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 2, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 919394432, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 2, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 920049792, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 2, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 920705152, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 2, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 921360512, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 2, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 923785344, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 2 } }, { "memory": 926210176, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 2, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 926865536, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 2, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 929290368, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 2, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 931715200, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 2, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 932370560, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 2, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 932583552, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 2, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 932796544, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 2, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 935221376, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 2, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 935434368, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 2, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 936089728, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 2, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 936302720, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 2, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 936958080, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 2, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 939382912, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 2, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 939595904, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 2, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 939808896, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 2, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 942233728, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 2, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 942889088, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 2, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 943544448, "layers": { "layer.0.self_attn.q_proj": 2, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 944199808, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 2, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 944855168, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 2, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 945510528, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 2, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 945723520, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 2, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 945936512, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 2, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 946591872, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 2, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 946804864, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 2, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 949229696, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 2, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 949442688, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 2, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 951867520, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 2, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 954292352, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 2, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 954505344, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 2, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 956930176, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 2, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 957143168, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 2, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 957356160, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 2, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 958011520, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 3, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 960436352, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 3, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 962861184, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 3, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 965286016, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 3, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 967710848, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 3, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 970135680, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 3, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 970348672, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 3, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 971004032, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 3, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 971659392, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 3, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 972314752, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 3, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 972970112, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 3, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 973625472, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 3, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 976050304, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 3, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 976263296, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 3, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 976918656, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 3, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 977574016, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 3, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 979998848, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 3, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 982423680, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 3, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 984848512, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 3, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 987273344, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 3, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 989698176, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 3, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 992123008, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 3, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 994547840, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 3, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 996972672, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 3, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 997185664, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 3, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 999610496, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 3, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1002035328, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 3, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1002248320, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 3, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1004673152, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 3, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1004886144, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 3, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1007310976, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 3, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1007966336, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 3, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1010391168, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 3, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1012816000, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 3, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1015240832, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 3, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1017665664, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 3, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1020090496, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 3, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1020745856, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 3, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1023170688, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 3, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1025595520, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 3, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1025808512, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 3, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1026463872, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 3, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1027119232, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 3, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1027774592, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 3, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1030199424, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 3, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1030854784, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 3, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1031510144, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 3, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1031723136, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 3, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1034147968, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 3, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1034803328, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 3, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1037228160, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 3, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1039652992, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 3, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1039865984, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 3 } }, { "memory": 1042290816, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 3, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1042503808, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 3, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1044928640, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 3, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1045141632, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 3, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1045796992, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 3, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1046452352, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 3, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1046665344, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 3, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1047320704, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 3, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1047976064, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 3, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1050400896, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 3, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1052825728, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 3, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1053038720, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 3, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1055463552, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 3, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1056118912, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 3, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1056331904, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 3, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1058756736, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 3, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1061181568, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 3, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1061394560, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 3, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1061607552, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 3, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1064032384, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 3, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1064245376, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 3, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1066670208, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 3, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1067325568, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 3, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1069750400, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 3, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1070405760, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 3, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1072830592, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 3, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1073043584, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 3, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1073698944, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 3, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1073911936, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 3, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1074124928, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 3, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1074337920, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 3, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1074993280, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 3, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1075206272, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 3, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1075861632, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 3, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1078286464, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 3, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1080711296, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 3, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1080924288, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 3, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1081137280, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 3, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1081792640, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 3, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1082005632, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 3, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1082660992, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 3, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1082873984, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 3, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1083529344, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 3, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1083742336, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 3, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1084397696, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 3, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1086822528, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 3, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1089247360, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 3, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1089902720, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 3, "layer.15.mlp.down_proj": 4 } }, { "memory": 1092327552, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 3, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1092540544, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 3, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1092753536, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 3, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1092966528, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 3, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1093179520, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 3, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1095604352, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 3, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1095817344, "layers": { "layer.0.self_attn.q_proj": 3, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1096472704, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 3, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1098897536, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 3, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1101322368, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 3, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1101977728, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 4, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 3, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1102190720, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 4, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 4, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1104615552, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 4, "layer.5.mlp.gate_proj": 4, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1107040384, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 4, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1107695744, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 4, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1110120576, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 4 } }, { "memory": 1112545408, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 4, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1114970240, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 4, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1117395072, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 4, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1119819904, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 4, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1122244736, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 4, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1122900096, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 4, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1125324928, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 4, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1125980288, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 4, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1126193280, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 4, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1126848640, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 4, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1129273472, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 4, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1129928832, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 4, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1132353664, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 4, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1134778496, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 4, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1137203328, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 4, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1137858688, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 4, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1140283520, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 4, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1142708352, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 4, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1145133184, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 4, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1147558016, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 4, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1149982848, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 4, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1152407680, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 4, "layer.15.mlp.down_proj": 5 } }, { "memory": 1154832512, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 4, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1155487872, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 4, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1157912704, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 4, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1160337536, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 4, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1162762368, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 4, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1162975360, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 4, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1163630720, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 4, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1166055552, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 4, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1168480384, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 4, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1170905216, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 4, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1173330048, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 4, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1175754880, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 4, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1176410240, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 4, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1178835072, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 4, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1181259904, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 4, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1181472896, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 4, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1183897728, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 4, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1186322560, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 4, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1186977920, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 4, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1187190912, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 4, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1187403904, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 4, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1189828736, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 4, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1192253568, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 4, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1194678400, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 4, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1197103232, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 4, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1199528064, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 4, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1199741056, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 4, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1202165888, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 4, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1204590720, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 4, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1207015552, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 4, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1207670912, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 4, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1210095744, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 4, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1210751104, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 4, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1211406464, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 4, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1211619456, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 4, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1211832448, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 4, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1212045440, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 4, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1214470272, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 4, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1216895104, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 4, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1217550464, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 4, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1217763456, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 4, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1217976448, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 4, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1218631808, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 4, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1221056640, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 4, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1221269632, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 4, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1221482624, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 4, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1221695616, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 4, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1222350976, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 4, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1222563968, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 4, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1223219328, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 4, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1223874688, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 4, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1226299520, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 4, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1226954880, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 4, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1227610240, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 4, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1228265600, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 4, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1230690432, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 4, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1233115264, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 4, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1233328256, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 4, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1233983616, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 4, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1234638976, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 4, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1237063808, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 4, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1237276800, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 4, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1237489792, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 4, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1237702784, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 4, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1238358144, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 4, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1238571136, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 4, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1238784128, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 4, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1239439488, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 4, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1240094848, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 4, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1240307840, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 4, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1240520832, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 4, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1240733824, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 4, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1241389184, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 4, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1242044544, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 4, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1242257536, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 4, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1242470528, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 4, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1243125888, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 4, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1243781248, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 4, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1243994240, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 4, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1244649600, "layers": { "layer.0.self_attn.q_proj": 4, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1245304960, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 4, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1245517952, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 4, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1245730944, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 4, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1245943936, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 4, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1246156928, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 4, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1246369920, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 5, "layer.15.mlp.down_proj": 5 } }, { "memory": 1248794752, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 5, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1251219584, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 5, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1251874944, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 5, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1254299776, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 5, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1256724608, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 5, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1259149440, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 5, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1261574272, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 5 } }, { "memory": 1263999104, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 5, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1266423936, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 5, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1268848768, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 5, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1271273600, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 5, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1273698432, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 5, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1274353792, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 5, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1276778624, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 5, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1279203456, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 5, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1279416448, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 5, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1281841280, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 5, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1284266112, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 5, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1286690944, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 5, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1289115776, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 5, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1291540608, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 5, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1292195968, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 5, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1292851328, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 5, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1295276160, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 5, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1297700992, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 5, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1300125824, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 5, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1302550656, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 5, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1303206016, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 5, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1305630848, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 5, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1308055680, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 5, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1310480512, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 5, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1312905344, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 5, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1315330176, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 5, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1315985536, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 5, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1316198528, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 5, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1318623360, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 5, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1318836352, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 5, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1321261184, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 5, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1321916544, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 5, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1322129536, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 5, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1322784896, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 5, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1322997888, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 5, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1325422720, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 5, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1325635712, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 5, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1328060544, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 5, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1328273536, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 5, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1328928896, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 5, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1329584256, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 5, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1330239616, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 5, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1332664448, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 5, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1335089280, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 5, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1337514112, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 5, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1338169472, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 5, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1338824832, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 5, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1339037824, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 5, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1341462656, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 5, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1343887488, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 5, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1344100480, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 5, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1346525312, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 5, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1346738304, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 5, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1349163136, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 5, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1349818496, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 5, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1352243328, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 5, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1354668160, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 5, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1357092992, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 5, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1357305984, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 5, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1359730816, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 5, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1359943808, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 5, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1360599168, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 5, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1361254528, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 5, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1361909888, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 5, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1362122880, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 5, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1362335872, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 5, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1362548864, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 5, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1362761856, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 5, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1363417216, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 5, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1363630208, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 5, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1363843200, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 5, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1366268032, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 5, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1366923392, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 5, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1367136384, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 5, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1367349376, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 5, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1367562368, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 5, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1369987200, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 5, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1370642560, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 5, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1370855552, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 5, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1373280384, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 5, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1373935744, "layers": { "layer.0.self_attn.q_proj": 5, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1374591104, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 5, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1374804096, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 5, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1375459456, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 5, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1377884288, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 5, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1378539648, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 5, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1379195008, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 5, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1381619840, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 5, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1382275200, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 5, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1382488192, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 5, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1383143552, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 5, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1383798912, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 5, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1384454272, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 5, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1385109632, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 5, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1385322624, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 5, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1385535616, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 5, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1385748608, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 5, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1386403968, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 5, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1386616960, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 5, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1386829952, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 5, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1387485312, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 5, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1389910144, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 5, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1390123136, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 5, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1390336128, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 5, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1390549120, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 6, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1392973952, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 6, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1395398784, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 6, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1396054144, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 6, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1398478976, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 6, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1399134336, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 6, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1401559168, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 6, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1403984000, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 6 } }, { "memory": 1406408832, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 6, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1408833664, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 6, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1411258496, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 6, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1413683328, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 6, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1416108160, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 6, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1418532992, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 6, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1420957824, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 6, "layer.15.mlp.down_proj": 7 } }, { "memory": 1423382656, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 6, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1425807488, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 6, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1428232320, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 6, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1430657152, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 6, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1433081984, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 6, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1435506816, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 6, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1435719808, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 6, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1438144640, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 6, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1440569472, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 6, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1440782464, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 6, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1443207296, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 6, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1445632128, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 6, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1448056960, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 6, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1450481792, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 6, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1451137152, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 6, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1453561984, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 6, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1455986816, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 6, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1458411648, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 6, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1460836480, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 6, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1463261312, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 6, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1465686144, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 6, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1468110976, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 6, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1470535808, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 6, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1472960640, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 6, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1475385472, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 6, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1477810304, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 6, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1480235136, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 6, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1480890496, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 6, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1483315328, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 6, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1483528320, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 6, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1483741312, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 6, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1486166144, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 6, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1486821504, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 6, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1489246336, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 6, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1489901696, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 6, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1490114688, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 6, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1490327680, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 6, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1492752512, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 6, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1492965504, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 6, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1493620864, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 6, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1493833856, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 6, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1496258688, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 6, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1498683520, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 6, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1501108352, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 6, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1503533184, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 6, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1504188544, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 6, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1504843904, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 6, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1505056896, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 6, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1505712256, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 6, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1505925248, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 6, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1506580608, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 6, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1507235968, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 6, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1507891328, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 6, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1508104320, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 6, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1508759680, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 6, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1508972672, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 6, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1509628032, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 6, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1512052864, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 6, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1512708224, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 6, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1515133056, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 6, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1515788416, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 6, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1516443776, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 6, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1516656768, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 6, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1519081600, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 6, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1519736960, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 6, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1520392320, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 6, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1521047680, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 6, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1521703040, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 6, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1522358400, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 6, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1523013760, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 6, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1523669120, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 6, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1523882112, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 6, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1524537472, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 6, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1524750464, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 6, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1524963456, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 6, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1525176448, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 6, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1525831808, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 6, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1526044800, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 6, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1526257792, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 6, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1526470784, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 6, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1527126144, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 6, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1527781504, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 6, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1527994496, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 6, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1528649856, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 6, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1529305216, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 6, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1529518208, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 6, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1529731200, "layers": { "layer.0.self_attn.q_proj": 6, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1530386560, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 6, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1530599552, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 6, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1530812544, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 6, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1533237376, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 6, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1533450368, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 6, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1533663360, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 6, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1533876352, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 6, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1534089344, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 6, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1534302336, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 6, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1534515328, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 6, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1534728320, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 7, "layer.15.mlp.down_proj": 7 } }, { "memory": 1537153152, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 7, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 7 } }, { "memory": 1539577984, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 7 } }, { "memory": 1542002816, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 7, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1544427648, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 7, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1546852480, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 7, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1549277312, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 7, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1551702144, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 7, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1552357504, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 7, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1554782336, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 7, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1557207168, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 7, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1559632000, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 7, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1562056832, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 7, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1564481664, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 7, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1566906496, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 7, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1569331328, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 7, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1571756160, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 7, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1572411520, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 7, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1574836352, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 7, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1577261184, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 7, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1579686016, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 7, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1582110848, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 7, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1584535680, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 7, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1586960512, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 7, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1587173504, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 7, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1589598336, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 7, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1590253696, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 7, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1592678528, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 7, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1593333888, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 7, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1595758720, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 7, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1595971712, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 7, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1598396544, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 7, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1600821376, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 7, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1603246208, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 7, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1605671040, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 7, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1605884032, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 7, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1606539392, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 7, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1608964224, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 7, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1611389056, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 7, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1611602048, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 7, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1614026880, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 7, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1616451712, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 7, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1618876544, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 7, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1619531904, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 7, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1621956736, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 7, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1622612096, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 7, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1623267456, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 7, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1623922816, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 7, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1624578176, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 7, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1627003008, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 7, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1627658368, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 7, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1627871360, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 7, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1628084352, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 7, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1628297344, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 7, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1628952704, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 7, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1631377536, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 7, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1633802368, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 7, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1634015360, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 7, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1634228352, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 7, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1636653184, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 7, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1639078016, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 7, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1639291008, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 7, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1639504000, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 7, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1639716992, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 7, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1640372352, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 7, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1642797184, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 7, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1645222016, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 7, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1645877376, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 7, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1646090368, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 7, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1646745728, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 7, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1649170560, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 7, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1649825920, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 7, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1650038912, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 7, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1650251904, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 7, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1650464896, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 7, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1650677888, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 7, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1651333248, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 7, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1653758080, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 7, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1654413440, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 7, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1655068800, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 7, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1655281792, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 7, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1657706624, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 7, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1657919616, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 7, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1658132608, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 7, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1658787968, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 7, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1659000960, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 7, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1659213952, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 7, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1659869312, "layers": { "layer.0.self_attn.q_proj": 7, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1660524672, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 7, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1661180032, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 7, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1661393024, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 7, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1661606016, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 7, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1662261376, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 7, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1662916736, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 7, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1663129728, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 7, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1663342720, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 7, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1665767552, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 7, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1666422912, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 7, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1667078272, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 7, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1667733632, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 7, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1667946624, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 7, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1668601984, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 7, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1669257344, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 7, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1669470336, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 7, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1669683328, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 7, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1672108160, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 7, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1672321152, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 7, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1674745984, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 7, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1675401344, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 7, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1675614336, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 7, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1678039168, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 7, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1678694528, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 7, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 8, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1678907520, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 8, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 8, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1681332352, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 8, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 8 } }, { "memory": 1683757184, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 8, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 8, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1686182016, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 8, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1686837376, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 8, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1689262208, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 8, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1691687040, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 8, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1694111872, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 8, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1696536704, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 8, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1696749696, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 8, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1697405056, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 8, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1699829888, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 8, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1702254720, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 8, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1704679552, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 8, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1707104384, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 8, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1709529216, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 8, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1711954048, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 8, "layer.15.mlp.down_proj": 9 } }, { "memory": 1714378880, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 8, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1716803712, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 8, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1719228544, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 8, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1721653376, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 8, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1724078208, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 8, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1726503040, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 8, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1728927872, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 8, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1729140864, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 8, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1729796224, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 8, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1732221056, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 8, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1734645888, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 8, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1737070720, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 8, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1737726080, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 8, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1740150912, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 8, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1742575744, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 8, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1745000576, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 8, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1747425408, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 8, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1749850240, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 8, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1750063232, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 8, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1752488064, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 8, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1754912896, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 8, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1757337728, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 8, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1759762560, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 8, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1762187392, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 8, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1762400384, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 8, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1764825216, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 8, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1765480576, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 8, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1766135936, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 8, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1766791296, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 8, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1769216128, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 8, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1771640960, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 8, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1771853952, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 8, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1774278784, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 8, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1774934144, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 8, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1777358976, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 8, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1779783808, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 8, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1779996800, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 8, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1780209792, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 8, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1780865152, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 8, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1781078144, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 8, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1783502976, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 8, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1785927808, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 8, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1786583168, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 8, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1789008000, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 8, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1789220992, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 8, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1789433984, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 8, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1789646976, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 8, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1789859968, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 8, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1790072960, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 8, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1790728320, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 8, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1793153152, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 8, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1793808512, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 8, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1796233344, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 8, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1796888704, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 8, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1797101696, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 8, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1799526528, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 8, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1801951360, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 8, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1802606720, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 8, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1803262080, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 8, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1803475072, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 8, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1804130432, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 8, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1804343424, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 8, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1804998784, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 8, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1805654144, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 8, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1808078976, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 8, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1808291968, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 8, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1810716800, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 8, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1811372160, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 8, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1811585152, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 8, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1811798144, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 8, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1812011136, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 8, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1812666496, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 8, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1813321856, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 8, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1813977216, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 8, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1814632576, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 8, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1815287936, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 8, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1815500928, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 8, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1815713920, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 8, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1816369280, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 8, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1817024640, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 8, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1817680000, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 8, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1818335360, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 8, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1818548352, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 8, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1819203712, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 8, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1819859072, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 8, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1820072064, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 8, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1820285056, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 8, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1820940416, "layers": { "layer.0.self_attn.q_proj": 8, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1821595776, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 8, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1821808768, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 8, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1822021760, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 8, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1822234752, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 8, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1822447744, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 8, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1822660736, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 8, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1822873728, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 8, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1823086720, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 9, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1823299712, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 9, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1825724544, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 9, "layer.15.mlp.down_proj": 9 } }, { "memory": 1828149376, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 9, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 9 } }, { "memory": 1830574208, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 9 } }, { "memory": 1832999040, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 9, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1833212032, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 9, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1835636864, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 9, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1838061696, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 9, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1838717056, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 9, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1841141888, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 9, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1843566720, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 9, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1845991552, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 9, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1848416384, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 9, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1850841216, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 9, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1853266048, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 9, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1855690880, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 9, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1855903872, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 9, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1856559232, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 9, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1858984064, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 9, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1861408896, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 9, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1863833728, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 9, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1866258560, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 9, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1868683392, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 9, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1871108224, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 9, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1873533056, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 9, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1875957888, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 9, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1876613248, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 9, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1879038080, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 9, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1879693440, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 9, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1882118272, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 9, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1884543104, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 9, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1886967936, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 9, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1889392768, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 9, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1891817600, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 9, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1892030592, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 9, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1894455424, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 9, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1896880256, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 9, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1897093248, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 9, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1897306240, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 9, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1899731072, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 9, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1902155904, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 9, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1904580736, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 9, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1907005568, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 9, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1907660928, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 9, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1910085760, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 9, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1910298752, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 9, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1912723584, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 9, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1913378944, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 9, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1914034304, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 9, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1916459136, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 9, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1918883968, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 9, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1919539328, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 9, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1920194688, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 9, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1922619520, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 9, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1925044352, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 9, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1927469184, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 9, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1927682176, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 9, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1928337536, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 9, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1928992896, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 9, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1929648256, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 9, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1930303616, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 9, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1932728448, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 9, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1932941440, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 9, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1933596800, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 9, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1936021632, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 9, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1936676992, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 9, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1936889984, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 9, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1937545344, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 9, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1939970176, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 9, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1940183168, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 9, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1940396160, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 9, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1941051520, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 9, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1943476352, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 9, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1944131712, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 9, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1944787072, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 9, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1945000064, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 9, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1945213056, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 9, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1947637888, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 9, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1948293248, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 9, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1948506240, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 9, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1949161600, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 9, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1949374592, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 9, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1949587584, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 9, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1949800576, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 9, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1950013568, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 9, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1950668928, "layers": { "layer.0.self_attn.q_proj": 9, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1951324288, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 9, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1951537280, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 9, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1952192640, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 9, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1952848000, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 9, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1953503360, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 9, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1954158720, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 9, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1954371712, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 9, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1955027072, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 9, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1955682432, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 9, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1956337792, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 9, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1956993152, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 9, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1959417984, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 9, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1959630976, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 9, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1959843968, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 9, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1960056960, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 9, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1960269952, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 9, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1960482944, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 9, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1960695936, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 9, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1963120768, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 9, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1963776128, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 9, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1966200960, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 9, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1966413952, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 9, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1966626944, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 9, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1966839936, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 9, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1967052928, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 9, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1967265920, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 10, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 10, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1969690752, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 10, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 10, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1972115584, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 10, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 10 } }, { "memory": 1974540416, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 10, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 10, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 11 } }, { "memory": 1974753408, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 10, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 10, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 11 } }, { "memory": 1977178240, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 10, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 11 } }, { "memory": 1977391232, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 10, "layer.15.mlp.down_proj": 11 } }, { "memory": 1979816064, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 10, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1982240896, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 10, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1984665728, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 10, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1984878720, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 10, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1987303552, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 10, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1989728384, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 10, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1992153216, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 10, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1994578048, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 10, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1997002880, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 10, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 1999427712, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 10, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2001852544, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 10, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2004277376, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 10, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2006702208, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 10, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2009127040, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 10, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2011551872, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 10, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2013976704, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 10, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2014632064, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 10, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2015287424, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 10, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2017712256, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 10, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2020137088, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 10, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2022561920, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 10, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2023217280, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 10, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2025642112, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 10, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2028066944, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 10, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2030491776, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 10, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2030704768, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 10, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2033129600, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 10, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2033784960, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 10, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2036209792, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 10, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2038634624, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 10, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2041059456, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 10, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2043484288, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 10, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2045909120, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 10, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2046122112, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 10, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2048546944, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 10, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2050971776, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 10, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2053396608, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 10, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2054051968, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 10, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2054264960, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 10, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2054477952, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 10, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2055133312, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 10, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2057558144, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 10, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2058213504, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 10, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2060638336, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 10, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2061293696, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 10, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2061949056, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 10, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2062604416, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 10, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2065029248, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 10, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2067454080, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 10, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2068109440, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 10, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2070534272, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 10, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2070747264, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 10, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2070960256, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 10, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2073385088, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 10, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2075809920, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 10, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2078234752, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 10, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2078890112, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 10, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2079103104, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 10, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2079758464, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 10, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2079971456, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 10, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2080626816, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 10, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2081282176, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 10, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2081495168, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 10, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2081708160, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 10, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2081921152, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 10, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2082134144, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 10, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2082347136, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 10, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2082560128, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 10, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2082773120, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 10, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2082986112, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 10, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2083641472, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 10, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2083854464, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 10, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2086279296, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 10, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2086934656, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 10, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2087590016, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 10, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2090014848, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 10, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2090670208, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 10, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2093095040, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 10, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2093750400, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 10, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2094405760, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 10, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2095061120, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 10, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2097485952, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 10, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2097698944, "layers": { "layer.0.self_attn.q_proj": 10, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2098354304, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 10, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2098567296, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 10, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2098780288, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 10, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2099435648, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 10, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2099648640, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 10, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2099861632, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 10, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2100516992, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 10, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2101172352, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 10, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2101827712, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 10, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2102040704, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 10, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2102696064, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 10, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2105120896, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 10, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2105776256, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 10, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2105989248, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 10, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2106202240, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 10, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2106857600, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 10, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2107070592, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 10, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2107283584, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 10, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2109708416, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 10, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2109921408, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 10, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2110134400, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 10, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2110789760, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 10, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2111445120, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 11, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2113869952, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 11, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2114082944, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 11, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2116507776, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 11, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2118932608, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 11, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2121357440, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 11, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2123782272, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 11, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2126207104, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 11, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2128631936, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 11, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2131056768, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 11, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2131712128, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 11, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2134136960, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 11, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2136561792, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 11, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2138986624, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 11, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2139199616, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 11, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2141624448, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 11 } }, { "memory": 2144049280, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 11, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2146474112, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 11, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2148898944, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 11, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2151323776, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 11, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2151979136, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 11, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2154403968, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 11, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2154616960, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 11, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2155272320, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 11, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2157697152, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 11, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2160121984, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 11, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2162546816, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 11, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2164971648, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 11, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2167396480, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 11, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2169821312, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 11, "layer.15.mlp.down_proj": 12 } }, { "memory": 2172246144, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 11, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2172901504, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 11, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2175326336, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 11, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2175981696, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 11, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2176637056, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 11, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2179061888, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 11, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2179717248, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 11, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2180372608, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 11, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2180585600, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 11, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2183010432, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 11, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2183665792, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 11, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2184321152, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 11, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2186745984, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 11, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2186958976, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 11, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2189383808, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 11, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2189596800, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 11, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2192021632, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 11, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2194446464, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 11, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2196871296, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 11, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2199296128, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 11, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2201720960, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 11, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2204145792, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 11, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2206570624, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 11, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2208995456, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 11, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2209208448, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 11, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2209421440, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 11, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2209634432, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 11, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2209847424, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 11, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2212272256, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 11, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2212485248, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 11, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2213140608, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 11, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2215565440, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 11, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2217990272, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 11, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2218203264, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 11, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2220628096, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 11, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2221283456, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 11, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2221496448, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 11, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2221709440, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 11, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2222364800, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 11, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2223020160, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 11, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2223675520, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 11, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2223888512, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 11, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2224101504, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 11, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2224756864, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 11, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2227181696, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 11, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2227837056, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 11, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2228492416, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 11, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2229147776, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 11, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2229803136, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 11, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2230458496, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 11, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2231113856, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 11, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2231326848, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 11, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2231982208, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 11, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2232637568, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 11, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2232850560, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 11, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2235275392, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 11, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2235930752, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 11, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2238355584, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 11, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2240780416, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 11, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2240993408, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 11, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2241206400, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 11, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2241861760, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 11, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2244286592, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 11, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2244499584, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 11, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2244712576, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 11, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2245367936, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 11, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2247792768, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 11, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2248005760, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 11, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2248661120, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 11, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2248874112, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 11, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2249087104, "layers": { "layer.0.self_attn.q_proj": 11, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2249742464, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 11, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2250397824, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 11, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2250610816, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 11, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2251266176, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 11, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2253691008, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 11, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2253904000, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 11, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2254116992, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 11, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2254329984, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 11, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2254542976, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 11, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2254755968, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 11, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2255411328, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 11, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2255624320, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 12, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 12, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2258049152, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 12, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2258262144, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 12, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2260686976, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 12 } }, { "memory": 2263111808, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 12, "layer.15.mlp.down_proj": 13 } }, { "memory": 2265536640, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 12, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2267961472, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 12, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2270386304, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 12, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2272811136, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 12, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2273024128, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 12, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2275448960, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 12, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2277873792, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 12, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2280298624, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 12, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2282723456, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 12, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2285148288, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 12, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2287573120, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 12, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2289997952, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 12, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2290653312, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 12, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2293078144, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 12, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2293733504, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 12, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2296158336, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 12, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2298583168, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 12, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2298796160, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 12, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2301220992, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 12, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2303645824, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 12, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2306070656, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 12, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2308495488, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 12, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2310920320, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 12, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2313345152, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 12, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2314000512, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 12, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2316425344, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 12, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2318850176, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 12, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2321275008, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 12, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2321930368, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 12, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2322143360, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 12, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2324568192, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 12, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2324781184, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 12, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2327206016, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 12, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2327419008, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 12, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2329843840, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 12, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2330499200, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 12, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2332924032, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 12, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2333579392, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 12, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2333792384, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 12, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2336217216, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 12, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2336872576, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 12, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2337527936, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 12, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2338183296, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 12, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2338838656, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 12, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2341263488, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 12, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2341476480, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 12, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2343901312, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 12, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2344556672, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 12, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2344769664, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 12, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2345425024, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 12, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2347849856, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 12, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2348062848, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 12, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2348718208, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 12, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2349373568, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 12, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2350028928, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 12, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2352453760, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 12, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2354878592, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 12, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2355091584, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 12, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2357516416, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 12, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2357729408, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 12, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2357942400, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 12, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2358597760, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 12, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2359253120, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 12, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2359466112, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 12, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2361890944, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 12, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2362103936, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 12, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2362316928, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 12, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2364741760, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 12, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2365397120, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 12, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2366052480, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 12, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2366265472, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 12, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2368690304, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 12, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2371115136, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 12, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2371770496, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 12, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2371983488, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 12, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2374408320, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 12, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2374621312, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 12, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2377046144, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 12, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2377701504, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 12, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2380126336, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 12, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2380781696, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 12, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2380994688, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 12, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2381650048, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 12, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2382305408, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 12, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2382518400, "layers": { "layer.0.self_attn.q_proj": 12, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2383173760, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 12, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2383829120, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 12, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2384484480, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 12, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2384697472, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 12, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2384910464, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 12, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2385565824, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 12, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2387990656, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 12, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2388203648, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 12, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2388416640, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 12, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2388629632, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 12, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2389284992, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 12, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2389940352, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 12, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2392365184, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 12, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2393020544, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 12, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2395445376, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 12, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2395658368, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 12, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2395871360, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 12, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2396084352, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 12, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2396297344, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 12, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2398722176, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 12, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2398935168, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 12, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2399148160, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 12, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2399803520, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 13, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2402228352, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 13, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2404653184, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 13, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2407078016, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 13, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2407733376, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 13, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2410158208, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 13, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2412583040, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 13, "layer.15.mlp.down_proj": 13 } }, { "memory": 2415007872, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 13, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2417432704, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 13, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2419857536, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 13, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2422282368, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 13, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2424707200, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 13, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2427132032, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 13, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2429556864, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 13, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2429769856, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 13, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2432194688, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 13, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2434619520, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 13, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2437044352, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 13, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2439469184, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 13, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2441894016, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 13, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2444318848, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 13, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2444974208, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 13, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2447399040, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 13, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2449823872, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 13 } }, { "memory": 2452248704, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 13, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2454673536, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 13, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2457098368, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 13, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2459523200, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 13, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2460178560, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 13, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2460833920, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 13, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2461046912, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 13, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2461259904, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 13, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2463684736, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 13, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2464340096, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 13, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2464995456, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 13, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2467420288, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 13, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2469845120, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 13, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2472269952, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 13, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2472482944, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 13, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2473138304, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 13, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2475563136, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 13, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2477987968, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 13, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2480412800, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 13, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2482837632, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 13, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2483492992, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 13, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2483705984, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 13, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2483918976, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 13, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2486343808, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 13, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2486999168, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 13, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2487654528, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 13, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2487867520, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 13, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2488522880, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 13, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2488735872, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 13, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2488948864, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 13, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2491373696, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 13, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2493798528, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 13, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2496223360, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 13, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2496436352, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 13, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2497091712, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 13, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2497304704, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 13, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2499729536, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 13, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2499942528, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 13, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2502367360, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 13, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2503022720, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 13, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2503235712, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 13, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2505660544, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 13, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2506315904, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 13, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2506971264, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 13, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2507184256, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 13, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2507839616, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 13, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2508052608, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 13, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2508265600, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 13, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2510690432, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 13, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2510903424, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 13, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2511558784, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 13, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2511771776, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 13, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2512427136, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 13, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2513082496, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 13, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2513295488, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 13, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2513950848, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 13, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2516375680, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 13, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2518800512, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 13, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2519455872, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 13, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2519668864, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 13, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2520324224, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 13, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2520537216, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 13, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2521192576, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 13, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2521847936, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 13, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2522503296, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 13, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2524928128, "layers": { "layer.0.self_attn.q_proj": 13, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2525583488, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 13, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2525796480, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 13, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2528221312, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 13, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2528876672, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 13, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2529532032, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 13, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2531956864, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 13, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2532612224, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 13, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2532825216, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 13, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2533038208, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 13, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2533251200, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 13, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2533464192, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 13, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2534119552, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 13, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2534332544, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 13, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2534545536, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 13, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2535200896, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 13, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2535413888, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 13, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2537838720, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 13, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2538051712, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 13, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2538264704, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 13, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2538920064, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 13, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2541344896, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 13, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2543769728, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 14, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 13, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2543982720, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 14, "layer.1.mlp.down_proj": 14, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2546407552, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 14, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 14, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2548832384, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 14, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2551257216, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 14 } }, { "memory": 2553682048, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 14, "layer.15.mlp.down_proj": 15 } }, { "memory": 2556106880, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 14, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2556319872, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 14, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2558744704, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 14, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2561169536, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 14, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2563594368, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 14, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2566019200, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 14, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2568444032, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 14, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2569099392, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 14, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2571524224, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 14, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2573949056, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 14, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2576373888, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 14, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2577029248, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 14, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2579454080, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 14, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2581878912, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 14, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2584303744, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 14, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2584516736, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 14, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2584729728, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 14, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2587154560, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 14, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2589579392, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 14, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2592004224, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 14, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2594429056, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 14, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2595084416, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 14, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2597509248, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 14, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2599934080, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 14, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2600589440, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 14, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2603014272, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 14, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2605439104, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 14, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2607863936, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 14, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2608076928, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 14, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2608732288, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 14, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2611157120, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 14, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2611812480, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 14, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2612025472, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 14, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2614450304, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 14, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2616875136, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 14, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2617088128, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 14, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2617743488, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 14, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2618398848, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 14, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2620823680, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 14, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2623248512, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 14, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2623903872, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 14, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2624116864, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 14, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2624329856, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 14, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2624542848, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 14, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2625198208, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 14, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2625411200, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 14, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2626066560, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 14, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2626721920, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 14, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2627377280, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 14, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2629802112, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 14, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2632226944, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 14, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2632439936, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 14, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2633095296, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 14, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2635520128, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 14, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2636175488, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 14, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2638600320, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 14, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2641025152, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 14, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2641238144, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 14, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2641451136, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 14, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2642106496, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 14, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2642761856, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 14, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2643417216, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 14, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2645842048, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 14, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2646055040, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 14, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2648479872, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 14, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2648692864, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 14, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2648905856, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 14, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2649561216, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 14, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2649774208, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 14, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2650429568, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 14, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2650642560, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 14, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2650855552, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 14, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2653280384, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 14, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2653493376, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 14, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2653706368, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 14, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2654361728, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 14, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2656786560, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 14, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2657441920, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 14, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2657654912, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 14, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2660079744, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 14, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2662504576, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 14, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2663159936, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 14, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2663372928, "layers": { "layer.0.self_attn.q_proj": 14, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2664028288, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 14, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2664241280, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 14, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2664896640, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 14, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2665109632, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 14, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2665322624, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 14, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2667747456, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 14, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2670172288, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 14, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2672597120, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 14, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2673252480, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 14, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2673907840, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 14, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2674563200, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 14, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2675218560, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 14, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2675431552, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 14, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2676086912, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 14, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2678511744, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 14, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2680936576, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 14, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2683361408, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 14, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2684016768, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 14, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2684229760, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 14, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2684442752, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 14, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2686867584, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 14, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2687080576, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 14, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2687293568, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 14, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2687948928, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 15, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 14, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2688161920, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 15, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 15, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2690586752, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 15, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 15, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2693011584, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 15, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2693666944, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 15, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2696091776, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 15, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2698516608, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 15, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2700941440, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 15, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2703366272, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 15, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2705791104, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 15, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2708215936, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 15, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2708428928, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 15, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2710853760, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 15, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2713278592, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 15, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2713933952, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 15, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2716358784, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 15, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2718783616, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 15, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2721208448, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 15 } }, { "memory": 2723633280, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 15, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2723846272, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 15, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2726271104, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 15, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2728695936, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 15, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2731120768, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 15, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2733545600, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 15, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2735970432, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 15, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2738395264, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 15, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2740820096, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 15, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2741033088, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 15, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2741688448, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 15, "layer.15.mlp.down_proj": 16 } }, { "memory": 2744113280, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 15, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2746538112, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 15, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2748962944, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 15, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2751387776, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 15, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2753812608, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 15, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2756237440, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 15, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2756450432, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 15, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2756663424, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 15, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2759088256, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 15, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2759743616, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 15, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2762168448, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 15, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2762381440, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 15, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2763036800, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 15, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2765461632, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 15, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2767886464, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 15, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2770311296, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 15, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2772736128, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 15, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2775160960, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 15, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2777585792, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 15, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2780010624, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 15, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2780665984, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 15, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2781321344, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 15, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2783746176, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 15, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2783959168, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 15, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2786384000, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 15, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2788808832, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 15, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2789021824, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 15, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2791446656, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 15, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2791659648, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 15, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2792315008, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 15, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2792528000, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 15, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2794952832, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 15, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2795165824, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 15, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2795378816, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 15, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2796034176, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 15, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2796689536, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 15, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2799114368, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 15, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2799327360, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 15, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2799982720, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 15, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2800638080, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 15, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2801293440, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 15, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2801506432, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 15, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2801719424, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 15, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2802374784, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 15, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2803030144, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 15, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2805454976, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 15, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2805667968, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 15, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2806323328, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 15, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2806536320, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 15, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2807191680, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 15, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2809616512, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 15, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2809829504, "layers": { "layer.0.self_attn.q_proj": 15, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2810484864, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 15, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2810697856, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 15, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2811353216, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 15, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2812008576, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 15, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2812221568, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 15, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2812434560, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 15, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2813089920, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 15, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2813302912, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 15, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2813958272, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 15, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2814613632, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 15, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2815268992, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 15, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2815481984, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 15, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2817906816, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 15, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2818562176, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 15, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2818775168, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 15, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2819430528, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 15, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2820085888, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 15, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2820298880, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 15, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2820511872, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 15, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2821167232, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 15, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2821380224, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 15, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2822035584, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 15, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2822248576, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 15, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2822461568, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 15, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2823116928, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 15, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2823329920, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 15, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2823985280, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 15, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2826410112, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 16, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 15, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2826623104, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 15, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 16, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 16, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2826836096, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 16, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 16, "layer.7.mlp.down_proj": 15, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 16, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2829260928, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 16, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 16, "layer.7.mlp.down_proj": 16, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 15, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 16, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2829916288, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 15, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 16, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 16, "layer.7.mlp.down_proj": 16, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 16, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 16, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } }, { "memory": 2832341120, "layers": { "layer.0.self_attn.q_proj": 16, "layer.0.self_attn.k_proj": 16, "layer.0.self_attn.v_proj": 16, "layer.0.self_attn.o_proj": 16, "layer.0.mlp.gate_proj": 16, "layer.0.mlp.up_proj": 16, "layer.0.mlp.down_proj": 16, "layer.1.self_attn.q_proj": 16, "layer.1.self_attn.k_proj": 16, "layer.1.self_attn.v_proj": 16, "layer.1.self_attn.o_proj": 16, "layer.1.mlp.gate_proj": 16, "layer.1.mlp.up_proj": 16, "layer.1.mlp.down_proj": 16, "layer.2.self_attn.q_proj": 16, "layer.2.self_attn.k_proj": 16, "layer.2.self_attn.v_proj": 16, "layer.2.self_attn.o_proj": 16, "layer.2.mlp.gate_proj": 16, "layer.2.mlp.up_proj": 16, "layer.2.mlp.down_proj": 16, "layer.3.self_attn.q_proj": 16, "layer.3.self_attn.k_proj": 16, "layer.3.self_attn.v_proj": 16, "layer.3.self_attn.o_proj": 16, "layer.3.mlp.gate_proj": 16, "layer.3.mlp.up_proj": 16, "layer.3.mlp.down_proj": 16, "layer.4.self_attn.q_proj": 16, "layer.4.self_attn.k_proj": 16, "layer.4.self_attn.v_proj": 16, "layer.4.self_attn.o_proj": 16, "layer.4.mlp.gate_proj": 16, "layer.4.mlp.up_proj": 16, "layer.4.mlp.down_proj": 16, "layer.5.self_attn.q_proj": 16, "layer.5.self_attn.k_proj": 16, "layer.5.self_attn.v_proj": 16, "layer.5.self_attn.o_proj": 16, "layer.5.mlp.gate_proj": 16, "layer.5.mlp.up_proj": 16, "layer.5.mlp.down_proj": 16, "layer.6.self_attn.q_proj": 16, "layer.6.self_attn.k_proj": 16, "layer.6.self_attn.v_proj": 16, "layer.6.self_attn.o_proj": 16, "layer.6.mlp.gate_proj": 16, "layer.6.mlp.up_proj": 16, "layer.6.mlp.down_proj": 16, "layer.7.self_attn.q_proj": 16, "layer.7.self_attn.k_proj": 16, "layer.7.self_attn.v_proj": 16, "layer.7.self_attn.o_proj": 16, "layer.7.mlp.gate_proj": 16, "layer.7.mlp.up_proj": 16, "layer.7.mlp.down_proj": 16, "layer.8.self_attn.q_proj": 16, "layer.8.self_attn.k_proj": 16, "layer.8.self_attn.v_proj": 16, "layer.8.self_attn.o_proj": 16, "layer.8.mlp.gate_proj": 16, "layer.8.mlp.up_proj": 16, "layer.8.mlp.down_proj": 16, "layer.9.self_attn.q_proj": 16, "layer.9.self_attn.k_proj": 16, "layer.9.self_attn.v_proj": 16, "layer.9.self_attn.o_proj": 16, "layer.9.mlp.gate_proj": 16, "layer.9.mlp.up_proj": 16, "layer.9.mlp.down_proj": 16, "layer.10.self_attn.q_proj": 16, "layer.10.self_attn.k_proj": 16, "layer.10.self_attn.v_proj": 16, "layer.10.self_attn.o_proj": 16, "layer.10.mlp.gate_proj": 16, "layer.10.mlp.up_proj": 16, "layer.10.mlp.down_proj": 16, "layer.11.self_attn.q_proj": 16, "layer.11.self_attn.k_proj": 16, "layer.11.self_attn.v_proj": 16, "layer.11.self_attn.o_proj": 16, "layer.11.mlp.gate_proj": 16, "layer.11.mlp.up_proj": 16, "layer.11.mlp.down_proj": 16, "layer.12.self_attn.q_proj": 16, "layer.12.self_attn.k_proj": 16, "layer.12.self_attn.v_proj": 16, "layer.12.self_attn.o_proj": 16, "layer.12.mlp.gate_proj": 16, "layer.12.mlp.up_proj": 16, "layer.12.mlp.down_proj": 16, "layer.13.self_attn.q_proj": 16, "layer.13.self_attn.k_proj": 16, "layer.13.self_attn.v_proj": 16, "layer.13.self_attn.o_proj": 16, "layer.13.mlp.gate_proj": 16, "layer.13.mlp.up_proj": 16, "layer.13.mlp.down_proj": 16, "layer.14.self_attn.q_proj": 16, "layer.14.self_attn.k_proj": 16, "layer.14.self_attn.v_proj": 16, "layer.14.self_attn.o_proj": 16, "layer.14.mlp.gate_proj": 16, "layer.14.mlp.up_proj": 16, "layer.14.mlp.down_proj": 16, "layer.15.self_attn.q_proj": 16, "layer.15.self_attn.k_proj": 16, "layer.15.self_attn.v_proj": 16, "layer.15.self_attn.o_proj": 16, "layer.15.mlp.gate_proj": 16, "layer.15.mlp.up_proj": 16, "layer.15.mlp.down_proj": 16 } } ]