nm-testing/Llama-3.1-8B-Instruct-KV-FP8-Tensor
8B
•
Updated
•
2
nm-testing/NVIDIA-Nemotron-Nano-9B-v2-quantized.w4a16
2B
•
Updated
•
7
nm-testing/Qwen3-VL-8B-Instruct-W4A16
3B
•
Updated
•
86
nm-testing/Qwen3-VL-8B-Instruct-NVFP4
6B
•
Updated
•
206
•
1
nm-testing/Qwen3-VL-4B-Instruct-NVFP4
3B
•
Updated
•
26
•
1
nm-testing/Llama-3.1-8B-Instruct-NVFP4-mse
5B
•
Updated
•
3
nm-testing/Llama-3.1-8B-Instruct-NVFP4-static_minmax
5B
•
Updated
•
3
nm-testing/EAGLE3-LLaMA3.1-Instruct-8B-sgl
nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized-w4a16-sgl
Updated
nm-testing/Llama-3.2-1B-Instruct-attention-fp8-head
1B
•
Updated
•
4
nm-testing/SpeculatorLlama3-1-8B-Eagle3-sgl
Updated
nm-testing/Mockup-qwen235-eagle3-fp16-sgl
Updated
nm-testing/Speculator-Qwen3-8B-Eagle3-sgl
Updated
nm-testing/Qwen3-VL-235B-A22B-Instruct-NVFP4
Updated
nm-testing/Mockup-qwen235-eagle3-fp16-speculators-converted
Updated
nm-testing/Llama-3.1-70B-Instruct-FP8-block
Text Generation
•
Updated
nm-testing/Qwen3-235B-A22B-EAGLE3-converted-speculators-lmsys
1B
•
Updated
•
2
nm-testing/Meta-Llama-3-8B-Instruct-attention-fp8
nm-testing/Qwen2.5-VL-7B-Instruct-INT8_dyn_per_token
8B
•
Updated
•
2
nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized-w4a16
1B
•
Updated
•
4.87k
nm-testing/llama-3.3-70b-speculators-eagle3
2B
•
Updated
nm-testing/Apertus-70B-Instruct-2509-quantized.w8a8.damp01.sq08
71B
•
Updated
•
3
nm-testing/Qwen3-30B-A3B-NVFP4-working
17B
•
Updated
nm-testing/Meta-Llama-3-8B-Instruct-selfattn-w8a8-mlp-w4a16-sequential
3B
•
Updated
nm-testing/for_testing_gptoss20b_spec_eagle3
0.8B
•
Updated
•
1
nm-testing/Llama-4-Maverick-17B-128E-Instruct-FP8-BLOCK
401B
•
Updated
•
1
nm-testing/Apertus-70B-Instruct-2509-NVFP4
41B
•
Updated
nm-testing/Apertus-8B-Instruct-2509-NVFP4
5B
•
Updated
•
3
nm-testing/Llama-4-Scout-17B-16E-Instruct-FP8-BLOCK
108B
•
Updated
nm-testing/tinysmokellama-3.2
354k
•
Updated
•
51.3k