{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 4290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.2524374723434448, "learning_rate": 4.8333333333333334e-05, "loss": 0.305, "step": 143 }, { "epoch": 1.0, "eval_accuracy": 0.9573631188256962, "eval_f1": 0.0, "eval_loss": 0.25014349818229675, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 24.3893, "eval_samples_per_second": 93.361, "eval_steps_per_second": 5.863, "step": 143 }, { "epoch": 2.0, "grad_norm": 2.41591477394104, "learning_rate": 4.666666666666667e-05, "loss": 0.1881, "step": 286 }, { "epoch": 2.0, "eval_accuracy": 0.9643132220795892, "eval_f1": 0.1014260249554367, "eval_loss": 0.18320757150650024, "eval_precision": 0.2123134328358209, "eval_recall": 0.06662763466042154, "eval_runtime": 24.4105, "eval_samples_per_second": 93.28, "eval_steps_per_second": 5.858, "step": 286 }, { "epoch": 3.0, "grad_norm": 1.3990638256072998, "learning_rate": 4.5e-05, "loss": 0.1434, "step": 429 }, { "epoch": 3.0, "eval_accuracy": 0.9702112518836858, "eval_f1": 0.2715324897609878, "eval_loss": 0.13982267677783966, "eval_precision": 0.2840516690113825, "eval_recall": 0.2600702576112412, "eval_runtime": 24.5498, "eval_samples_per_second": 92.75, "eval_steps_per_second": 5.825, "step": 429 }, { "epoch": 4.0, "grad_norm": 1.508366346359253, "learning_rate": 4.3333333333333334e-05, "loss": 0.1107, "step": 572 }, { "epoch": 4.0, "eval_accuracy": 0.977955293854998, "eval_f1": 0.3784165050255687, "eval_loss": 0.09865138679742813, "eval_precision": 0.3799126637554585, "eval_recall": 0.3769320843091335, "eval_runtime": 24.4817, "eval_samples_per_second": 93.008, "eval_steps_per_second": 5.841, "step": 572 }, { "epoch": 5.0, "grad_norm": 2.8642067909240723, "learning_rate": 4.166666666666667e-05, "loss": 0.0877, "step": 715 }, { "epoch": 5.0, "eval_accuracy": 0.9799073505609198, "eval_f1": 0.45260009203865625, "eval_loss": 0.08595172315835953, "eval_precision": 0.4448213478064224, "eval_recall": 0.460655737704918, "eval_runtime": 24.4775, "eval_samples_per_second": 93.024, "eval_steps_per_second": 5.842, "step": 715 }, { "epoch": 6.0, "grad_norm": 1.2604122161865234, "learning_rate": 4e-05, "loss": 0.0736, "step": 858 }, { "epoch": 6.0, "eval_accuracy": 0.9855486409555171, "eval_f1": 0.5592687470889613, "eval_loss": 0.06336811929941177, "eval_precision": 0.5561602593793423, "eval_recall": 0.5624121779859484, "eval_runtime": 24.9031, "eval_samples_per_second": 91.435, "eval_steps_per_second": 5.742, "step": 858 }, { "epoch": 7.0, "grad_norm": 0.9219182729721069, "learning_rate": 3.8333333333333334e-05, "loss": 0.0592, "step": 1001 }, { "epoch": 7.0, "eval_accuracy": 0.9885932354746888, "eval_f1": 0.6266651355075792, "eval_loss": 0.05170835927128792, "eval_precision": 0.614803965750338, "eval_recall": 0.6389929742388759, "eval_runtime": 24.5205, "eval_samples_per_second": 92.861, "eval_steps_per_second": 5.832, "step": 1001 }, { "epoch": 8.0, "grad_norm": 0.5538210868835449, "learning_rate": 3.6666666666666666e-05, "loss": 0.049, "step": 1144 }, { "epoch": 8.0, "eval_accuracy": 0.9903639002065078, "eval_f1": 0.6772266065388951, "eval_loss": 0.040448613464832306, "eval_precision": 0.6529347826086956, "eval_recall": 0.7033957845433255, "eval_runtime": 26.0505, "eval_samples_per_second": 87.407, "eval_steps_per_second": 5.489, "step": 1144 }, { "epoch": 9.0, "grad_norm": 0.5682235956192017, "learning_rate": 3.5e-05, "loss": 0.0404, "step": 1287 }, { "epoch": 9.0, "eval_accuracy": 0.9925099067924318, "eval_f1": 0.7394493342360641, "eval_loss": 0.032923389226198196, "eval_precision": 0.7135235191637631, "eval_recall": 0.7673302107728337, "eval_runtime": 24.6843, "eval_samples_per_second": 92.245, "eval_steps_per_second": 5.793, "step": 1287 }, { "epoch": 10.0, "grad_norm": 2.0670313835144043, "learning_rate": 3.3333333333333335e-05, "loss": 0.0328, "step": 1430 }, { "epoch": 10.0, "eval_accuracy": 0.9937712786738852, "eval_f1": 0.7751125292006154, "eval_loss": 0.026249362155795097, "eval_precision": 0.7548551770058817, "eval_recall": 0.7964871194379392, "eval_runtime": 24.857, "eval_samples_per_second": 91.604, "eval_steps_per_second": 5.753, "step": 1430 }, { "epoch": 11.0, "grad_norm": 2.1008994579315186, "learning_rate": 3.1666666666666666e-05, "loss": 0.0286, "step": 1573 }, { "epoch": 11.0, "eval_accuracy": 0.9950842775018139, "eval_f1": 0.8216382602747535, "eval_loss": 0.020949603989720345, "eval_precision": 0.800510940797512, "eval_recall": 0.8439110070257612, "eval_runtime": 24.6118, "eval_samples_per_second": 92.517, "eval_steps_per_second": 5.81, "step": 1573 }, { "epoch": 12.0, "grad_norm": 0.8632619976997375, "learning_rate": 3e-05, "loss": 0.0229, "step": 1716 }, { "epoch": 12.0, "eval_accuracy": 0.9960051906011052, "eval_f1": 0.8509210825562884, "eval_loss": 0.016843697056174278, "eval_precision": 0.8270335985853228, "eval_recall": 0.8762295081967213, "eval_runtime": 24.6326, "eval_samples_per_second": 92.439, "eval_steps_per_second": 5.805, "step": 1716 }, { "epoch": 13.0, "grad_norm": 1.6470736265182495, "learning_rate": 2.8333333333333335e-05, "loss": 0.0192, "step": 1859 }, { "epoch": 13.0, "eval_accuracy": 0.9966414578333427, "eval_f1": 0.8833197342347593, "eval_loss": 0.014622141607105732, "eval_precision": 0.8793223485727547, "eval_recall": 0.8873536299765808, "eval_runtime": 25.8427, "eval_samples_per_second": 88.11, "eval_steps_per_second": 5.533, "step": 1859 }, { "epoch": 14.0, "grad_norm": 0.4230591058731079, "learning_rate": 2.6666666666666667e-05, "loss": 0.0172, "step": 2002 }, { "epoch": 14.0, "eval_accuracy": 0.9968563375565106, "eval_f1": 0.8954657778288604, "eval_loss": 0.012411631643772125, "eval_precision": 0.8792461347477711, "eval_recall": 0.9122950819672131, "eval_runtime": 24.5899, "eval_samples_per_second": 92.599, "eval_steps_per_second": 5.815, "step": 2002 }, { "epoch": 15.0, "grad_norm": 0.9317820072174072, "learning_rate": 2.5e-05, "loss": 0.0153, "step": 2145 }, { "epoch": 15.0, "eval_accuracy": 0.9975428364123459, "eval_f1": 0.9154970589948168, "eval_loss": 0.010398673824965954, "eval_precision": 0.910670837678137, "eval_recall": 0.9203747072599532, "eval_runtime": 25.4287, "eval_samples_per_second": 89.544, "eval_steps_per_second": 5.624, "step": 2145 }, { "epoch": 16.0, "grad_norm": 0.3844711482524872, "learning_rate": 2.3333333333333336e-05, "loss": 0.0127, "step": 2288 }, { "epoch": 16.0, "eval_accuracy": 0.9975916727130658, "eval_f1": 0.9196624667668478, "eval_loss": 0.009496341459453106, "eval_precision": 0.9080118694362018, "eval_recall": 0.931615925058548, "eval_runtime": 24.6017, "eval_samples_per_second": 92.555, "eval_steps_per_second": 5.813, "step": 2288 }, { "epoch": 17.0, "grad_norm": 0.7824459075927734, "learning_rate": 2.1666666666666667e-05, "loss": 0.0101, "step": 2431 }, { "epoch": 17.0, "eval_accuracy": 0.9979837584417034, "eval_f1": 0.9320265206111271, "eval_loss": 0.00775914778932929, "eval_precision": 0.9180011357183419, "eval_recall": 0.9464871194379391, "eval_runtime": 24.6006, "eval_samples_per_second": 92.559, "eval_steps_per_second": 5.813, "step": 2431 }, { "epoch": 18.0, "grad_norm": 1.2668181657791138, "learning_rate": 2e-05, "loss": 0.0096, "step": 2574 }, { "epoch": 18.0, "eval_accuracy": 0.9981916615504828, "eval_f1": 0.9469846046996181, "eval_loss": 0.006909618154168129, "eval_precision": 0.9362554360265507, "eval_recall": 0.9579625292740047, "eval_runtime": 24.6298, "eval_samples_per_second": 92.449, "eval_steps_per_second": 5.806, "step": 2574 }, { "epoch": 19.0, "grad_norm": 0.8047562837600708, "learning_rate": 1.8333333333333333e-05, "loss": 0.0082, "step": 2717 }, { "epoch": 19.0, "eval_accuracy": 0.9983981693363845, "eval_f1": 0.9509088803982865, "eval_loss": 0.0058281742967665195, "eval_precision": 0.9403480650332036, "eval_recall": 0.9617096018735363, "eval_runtime": 24.6016, "eval_samples_per_second": 92.555, "eval_steps_per_second": 5.813, "step": 2717 }, { "epoch": 20.0, "grad_norm": 0.3952634036540985, "learning_rate": 1.6666666666666667e-05, "loss": 0.0074, "step": 2860 }, { "epoch": 20.0, "eval_accuracy": 0.9985851426019982, "eval_f1": 0.9587371597701816, "eval_loss": 0.005235890857875347, "eval_precision": 0.9504084685306639, "eval_recall": 0.9672131147540983, "eval_runtime": 24.5882, "eval_samples_per_second": 92.605, "eval_steps_per_second": 5.816, "step": 2860 }, { "epoch": 21.0, "grad_norm": 0.20539724826812744, "learning_rate": 1.5e-05, "loss": 0.0065, "step": 3003 }, { "epoch": 21.0, "eval_accuracy": 0.998590723893509, "eval_f1": 0.9577888284954319, "eval_loss": 0.00510649848729372, "eval_precision": 0.9460817911811743, "eval_recall": 0.9697892271662764, "eval_runtime": 24.8469, "eval_samples_per_second": 91.641, "eval_steps_per_second": 5.755, "step": 3003 }, { "epoch": 22.0, "grad_norm": 0.10989252477884293, "learning_rate": 1.3333333333333333e-05, "loss": 0.0058, "step": 3146 }, { "epoch": 22.0, "eval_accuracy": 0.998551654852933, "eval_f1": 0.9594181146452693, "eval_loss": 0.004978466313332319, "eval_precision": 0.9461459637936923, "eval_recall": 0.9730679156908665, "eval_runtime": 24.4805, "eval_samples_per_second": 93.013, "eval_steps_per_second": 5.841, "step": 3146 }, { "epoch": 23.0, "grad_norm": 0.10916499048471451, "learning_rate": 1.1666666666666668e-05, "loss": 0.0056, "step": 3289 }, { "epoch": 23.0, "eval_accuracy": 0.9987204889211363, "eval_f1": 0.965821901883283, "eval_loss": 0.00421318830922246, "eval_precision": 0.958910433979686, "eval_recall": 0.9728337236533958, "eval_runtime": 24.4468, "eval_samples_per_second": 93.141, "eval_steps_per_second": 5.849, "step": 3289 }, { "epoch": 24.0, "grad_norm": 0.3903842270374298, "learning_rate": 1e-05, "loss": 0.0051, "step": 3432 }, { "epoch": 24.0, "eval_accuracy": 0.9987930457107774, "eval_f1": 0.969594986654288, "eval_loss": 0.004073758609592915, "eval_precision": 0.961007591442374, "eval_recall": 0.9783372365339579, "eval_runtime": 24.4468, "eval_samples_per_second": 93.141, "eval_steps_per_second": 5.849, "step": 3432 }, { "epoch": 25.0, "grad_norm": 2.5139732360839844, "learning_rate": 8.333333333333334e-06, "loss": 0.0049, "step": 3575 }, { "epoch": 25.0, "eval_accuracy": 0.9989186247697718, "eval_f1": 0.9717112922002328, "eval_loss": 0.0037488937377929688, "eval_precision": 0.966087962962963, "eval_recall": 0.9774004683840749, "eval_runtime": 24.4794, "eval_samples_per_second": 93.017, "eval_steps_per_second": 5.842, "step": 3575 }, { "epoch": 26.0, "grad_norm": 0.6361393332481384, "learning_rate": 6.666666666666667e-06, "loss": 0.0047, "step": 3718 }, { "epoch": 26.0, "eval_accuracy": 0.9989353686443043, "eval_f1": 0.9728469875305908, "eval_loss": 0.0034835096448659897, "eval_precision": 0.968220830433774, "eval_recall": 0.9775175644028103, "eval_runtime": 24.4891, "eval_samples_per_second": 92.98, "eval_steps_per_second": 5.839, "step": 3718 }, { "epoch": 27.0, "grad_norm": 0.22475773096084595, "learning_rate": 5e-06, "loss": 0.0043, "step": 3861 }, { "epoch": 27.0, "eval_accuracy": 0.9989451359044483, "eval_f1": 0.9760381528440154, "eval_loss": 0.0034792672377079725, "eval_precision": 0.9696094291657037, "eval_recall": 0.9825526932084309, "eval_runtime": 24.4691, "eval_samples_per_second": 93.056, "eval_steps_per_second": 5.844, "step": 3861 }, { "epoch": 28.0, "grad_norm": 0.24321018159389496, "learning_rate": 3.3333333333333333e-06, "loss": 0.0038, "step": 4004 }, { "epoch": 28.0, "eval_accuracy": 0.9989744376848803, "eval_f1": 0.9768117626547335, "eval_loss": 0.003415257204324007, "eval_precision": 0.9696550132687204, "eval_recall": 0.9840749414519906, "eval_runtime": 24.4398, "eval_samples_per_second": 93.168, "eval_steps_per_second": 5.851, "step": 4004 }, { "epoch": 29.0, "grad_norm": 0.5957739949226379, "learning_rate": 1.6666666666666667e-06, "loss": 0.0037, "step": 4147 }, { "epoch": 29.0, "eval_accuracy": 0.9990079254339455, "eval_f1": 0.9779304722529552, "eval_loss": 0.003256614087149501, "eval_precision": 0.9726630371829028, "eval_recall": 0.9832552693208431, "eval_runtime": 24.5523, "eval_samples_per_second": 92.741, "eval_steps_per_second": 5.824, "step": 4147 }, { "epoch": 30.0, "grad_norm": 0.8235336542129517, "learning_rate": 0.0, "loss": 0.0038, "step": 4290 }, { "epoch": 30.0, "eval_accuracy": 0.9990121114025785, "eval_f1": 0.977371880635216, "eval_loss": 0.0032377191819250584, "eval_precision": 0.9711016067506647, "eval_recall": 0.9837236533957845, "eval_runtime": 24.4476, "eval_samples_per_second": 93.138, "eval_steps_per_second": 5.849, "step": 4290 } ], "logging_steps": 500, "max_steps": 4290, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.785257029315584e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }