{ "benchmarks": { "gsm8k": { "name": "Grade School Math 8K", "dataset": "openai/gsm8k", "lower_is_better": false, "models": [ { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 99.6, "date": "2026-04-27" }, { "model_id": "meta-llama/Llama-3.1-405B", "short_name": "Llama-3.1-405B", "provider": "meta-llama", "score": 96.8, "date": "2024-07-16" }, { "model_id": "deepseek-ai/DeepSeek-V3", "short_name": "DeepSeek-V3", "provider": "deepseek-ai", "score": 94.31, "date": "2024-12-25" }, { "model_id": "ibm-granite/granite-4.1-30b", "short_name": "granite-4.1-30b", "provider": "ibm-granite", "score": 94.16, "date": "2026-04-06" }, { "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct", "short_name": "Llama-3.2-90B-Vision-Instruct", "provider": "meta-llama", "score": 93.1, "date": "2024-09-19" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 92.6, "date": "2026-04-22" }, { "model_id": "ibm-granite/granite-4.1-8b", "short_name": "granite-4.1-8b", "provider": "ibm-granite", "score": 92.49, "date": "2026-04-06" }, { "model_id": "microsoft/Phi-3-medium-4k-instruct", "short_name": "Phi-3-medium-4k-instruct", "provider": "microsoft", "score": 91.0, "date": "2024-05-07" }, { "model_id": "Qwen/Qwen2-72B", "short_name": "Qwen2-72B", "provider": "Qwen", "score": 89.5, "date": "2024-05-22" }, { "model_id": "ibm-granite/granite-4.1-3b", "short_name": "granite-4.1-3b", "provider": "ibm-granite", "score": 86.88, "date": "2026-04-06" }, { "model_id": "microsoft/Phi-3.5-mini-instruct", "short_name": "Phi-3.5-mini-instruct", "provider": "microsoft", "score": 86.2, "date": "2024-08-16" }, { "model_id": "internlm/internlm2_5-7b-chat", "short_name": "internlm2_5-7b-chat", "provider": "internlm", "score": 86.0, "date": "2024-06-27" }, { "model_id": "microsoft/Phi-3-mini-4k-instruct", "short_name": "Phi-3-mini-4k-instruct", "provider": "microsoft", "score": 85.7, "date": "2024-04-22" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Base-Pretrain", "short_name": "Mellum2-12B-A2.5B-Base-Pretrain", "provider": "JetBrains", "score": 81.73, "date": "2026-04-07" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Base", "short_name": "Mellum2-12B-A2.5B-Base", "provider": "JetBrains", "score": 81.73, "date": "2026-05-26" }, { "model_id": "Qwen/Qwen2-7B", "short_name": "Qwen2-7B", "provider": "Qwen", "score": 79.9, "date": "2024-06-04" }, { "model_id": "internlm/internlm2-chat-20b", "short_name": "internlm2-chat-20b", "provider": "internlm", "score": 79.6, "date": "2024-01-10" }, { "model_id": "deepseek-ai/DeepSeek-V2", "short_name": "DeepSeek-V2", "provider": "deepseek-ai", "score": 79.2, "date": "2024-04-22" } ] }, "hle": { "name": "HLE", "dataset": "cais/hle", "lower_is_better": false, "models": [ { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 54.0, "date": "2026-04-14" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 52.3, "date": "2026-04-03" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 50.4, "date": "2026-02-11" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 50.2, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 48.5, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 48.3, "date": "2026-02-16" }, { "model_id": "stepfun-ai/Step-3.7-Flash", "short_name": "Step-3.7-Flash", "provider": "stepfun-ai", "score": 48.1, "date": "2026-05-23" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 48.0, "date": "2026-04-27" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 47.5, "date": "2026-02-24" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 44.9, "date": "2025-11-04" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 42.8, "date": "2025-12-22" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 37.7, "date": "2026-04-22" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "provider": "nvidia", "score": 37.4, "date": "2026-06-03" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "provider": "nvidia", "score": 37.4, "date": "2026-06-03" }, { "model_id": "deepseek-ai/DeepSeek-V4-Flash", "short_name": "DeepSeek-V4-Flash", "provider": "deepseek-ai", "score": 34.8, "date": "2026-04-22" }, { "model_id": "PolarSeeker/OpenSeeker-v2-30B-SFT", "short_name": "OpenSeeker-v2-30B-SFT", "provider": "PolarSeeker", "score": 34.6, "date": "2026-05-05" }, { "model_id": "tencent/Hy3-preview", "short_name": "Hy3-preview", "provider": "tencent", "score": 30.0, "date": "2026-04-13" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 24.0, "date": "2026-04-21" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 23.1, "date": "2026-02-01" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 22.82, "date": "2026-03-10" }, { "model_id": "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "RedHatAI", "score": 22.82, "date": "2026-03-26" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 22.2, "date": "2025-12-20" }, { "model_id": "XiaomiMiMo/MiMo-V2-Flash", "short_name": "MiMo-V2-Flash", "provider": "XiaomiMiMo", "score": 22.1, "date": "2025-12-16" }, { "model_id": "internlm/Intern-S2-Preview", "short_name": "Intern-S2-Preview", "provider": "internlm", "score": 21.94, "date": "2026-05-15" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 21.4, "date": "2026-04-15" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 19.4, "date": "2026-02-12" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 19.0, "date": "2025-08-04" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 17.3, "date": "2025-08-04" }, { "model_id": "zai-org/GLM-4.7-Flash", "short_name": "GLM-4.7-Flash", "provider": "zai-org", "score": 14.4, "date": "2026-01-19" }, { "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", "short_name": "K-EXAONE-236B-A23B", "provider": "LGAI-EXAONE", "score": 13.6, "date": "2025-12-26" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 12.5, "date": "2025-10-22" }, { "model_id": "HelpingAI/Dhanishtha-2.0-0126", "short_name": "Dhanishtha-2.0-0126", "provider": "HelpingAI", "score": 9.92, "date": "2026-01-01" }, { "model_id": "zai-org/GLM-4.5", "short_name": "GLM-4.5", "provider": "zai-org", "score": 8.32, "date": "2025-07-20" }, { "model_id": "zai-org/GLM-4.5-Air", "short_name": "GLM-4.5-Air", "provider": "zai-org", "score": 8.12, "date": "2025-07-20" } ] }, "chi_bench": { "name": "\u03c7-Bench", "dataset": "actava/chi-bench", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 18.7, "date": "2026-04-03" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 15.6, "date": "2026-04-14" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 14.2, "date": "2026-04-22" } ] }, "WBench": { "name": "WBench", "dataset": "meituan-longcat/WBench", "lower_is_better": false, "models": [ { "model_id": "robbyant/lingbot-world-fast", "short_name": "lingbot-world-fast", "provider": "robbyant", "score": 78.8, "date": "2026-04-01" }, { "model_id": "tencent/HY-WorldPlay", "short_name": "HY-WorldPlay", "provider": "tencent", "score": 78.4, "date": "2025-12-12" }, { "model_id": "tencent/HunyuanVideo-1.5", "short_name": "HunyuanVideo-1.5", "provider": "tencent", "score": 78.2, "date": "2025-11-18" }, { "model_id": "Lightricks/LTX-2.3", "short_name": "LTX-2.3", "provider": "Lightricks", "score": 74.4, "date": "2026-03-04" }, { "model_id": "inspatio/world", "short_name": "world", "provider": "inspatio", "score": 74.3, "date": "2026-03-16" }, { "model_id": "meituan-longcat/LongCat-Video", "short_name": "LongCat-Video", "provider": "meituan-longcat", "score": 73.7, "date": "2025-10-24" }, { "model_id": "tencent/Hunyuan-GameCraft-1.0", "short_name": "Hunyuan-GameCraft-1.0", "provider": "tencent", "score": 68.5, "date": "2025-08-13" } ] }, "gpqa": { "name": "GPQA Diamond", "dataset": "Idavidrein/gpqa", "lower_is_better": false, "models": [ { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 90.5, "date": "2026-04-14" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 90.1, "date": "2026-04-22" }, { "model_id": "FINAL-Bench/Darwin-28B-REASON", "short_name": "Darwin-28B-REASON", "provider": "FINAL-Bench", "score": 89.39, "date": "2026-05-17" }, { "model_id": "FINAL-Bench/Darwin-28B-Opus", "short_name": "Darwin-28B-Opus", "provider": "FINAL-Bench", "score": 88.89, "date": "2026-04-24" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 88.4, "date": "2026-02-16" }, { "model_id": "FINAL-Bench/Darwin-36B-Opus", "short_name": "Darwin-36B-Opus", "provider": "FINAL-Bench", "score": 88.4, "date": "2026-04-22" }, { "model_id": "FINAL-Bench/Darwin-60B-DUO", "short_name": "Darwin-60B-DUO", "provider": "FINAL-Bench", "score": 88.38, "date": "2026-05-27" }, { "model_id": "inclusionAI/Ring-2.6-1T", "short_name": "Ring-2.6-1T", "provider": "inclusionAI", "score": 88.27, "date": "2026-05-14" }, { "model_id": "deepseek-ai/DeepSeek-V4-Flash", "short_name": "DeepSeek-V4-Flash", "provider": "deepseek-ai", "score": 88.1, "date": "2026-04-22" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "provider": "nvidia", "score": 87.9, "date": "2026-06-03" }, { "model_id": "zai-org/GLM-4.7-FP8", "short_name": "GLM-4.7-FP8", "provider": "zai-org", "score": 87.88, "date": "2025-12-22" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 87.8, "date": "2026-04-21" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 87.6, "date": "2026-01-01" }, { "model_id": "tencent/Hy3-preview", "short_name": "Hy3-preview", "provider": "tencent", "score": 87.2, "date": "2026-04-13" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "provider": "nvidia", "score": 87.0, "date": "2026-06-03" }, { "model_id": "FINAL-Bench/Darwin-27B-Opus", "short_name": "Darwin-27B-Opus", "provider": "FINAL-Bench", "score": 86.9, "date": "2026-04-12" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 86.6, "date": "2026-02-24" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 86.2, "date": "2026-04-03" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 86.0, "date": "2026-02-11" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 86.0, "date": "2026-04-15" }, { "model_id": "FINAL-Bench/Darwin-31B-Opus", "short_name": "Darwin-31B-Opus", "provider": "FINAL-Bench", "score": 85.9, "date": "2026-04-06" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 85.86, "date": "2026-02-12" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 85.7, "date": "2025-12-22" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 85.5, "date": "2026-02-24" }, { "model_id": "zai-org/GLM-5-FP8", "short_name": "GLM-5-FP8", "provider": "zai-org", "score": 85.35, "date": "2026-02-11" }, { "model_id": "FINAL-Bench/Darwin-4B-David", "short_name": "Darwin-4B-David", "provider": "FINAL-Bench", "score": 85.0, "date": "2026-04-10" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 84.5, "date": "2025-11-04" }, { "model_id": "FINAL-Bench/Darwin-9B-NEG", "short_name": "Darwin-9B-NEG", "provider": "FINAL-Bench", "score": 84.34, "date": "2026-04-24" }, { "model_id": "JGOS-Model/JGOS-31B-Citizen", "short_name": "JGOS-31B-Citizen", "provider": "JGOS-Model", "score": 84.34, "date": "2026-06-04" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 83.5, "date": "2026-02-01" }, { "model_id": "XiaomiMiMo/MiMo-V2-Flash", "short_name": "MiMo-V2-Flash", "provider": "XiaomiMiMo", "score": 83.33, "date": "2025-12-16" }, { "model_id": "zai-org/GLM-5.1-FP8", "short_name": "GLM-5.1-FP8", "provider": "zai-org", "score": 83.33, "date": "2026-04-03" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 82.7, "date": "2026-03-10" }, { "model_id": "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "RedHatAI", "score": 82.7, "date": "2026-03-26" }, { "model_id": "FINAL-Bench/Darwin-9B-Opus", "short_name": "Darwin-9B-Opus", "provider": "FINAL-Bench", "score": 82.5, "date": "2026-04-04" }, { "model_id": "zai-org/GLM-4.6-FP8", "short_name": "GLM-4.6-FP8", "provider": "zai-org", "score": 81.31, "date": "2025-09-29" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 80.9, "date": "2025-08-04" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 80.81, "date": "2025-12-20" }, { "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", "short_name": "LongCat-Flash-Thinking-2601", "provider": "meituan-longcat", "score": 80.5, "date": "2026-01-14" }, { "model_id": "LGAI-EXAONE/EXAONE-4.5-33B", "short_name": "EXAONE-4.5-33B", "provider": "LGAI-EXAONE", "score": 80.5, "date": "2026-04-04" }, { "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", "short_name": "K-EXAONE-236B-A23B", "provider": "LGAI-EXAONE", "score": 79.1, "date": "2025-12-26" }, { "model_id": "deepseek-ai/DeepSeek-R1-0528", "short_name": "DeepSeek-R1-0528", "provider": "deepseek-ai", "score": 78.79, "date": "2025-05-28" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-FP8", "provider": "nvidia", "score": 77.27, "date": "2026-03-10" }, { "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", "short_name": "Nemotron-Cascade-2-30B-A3B", "provider": "nvidia", "score": 76.1, "date": "2026-03-18" }, { "model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "short_name": "Qwen3-Next-80B-A3B-Thinking", "provider": "Qwen", "score": 75.76, "date": "2025-09-09" }, { "model_id": "zai-org/GLM-4.7-Flash", "short_name": "GLM-4.7-Flash", "provider": "zai-org", "score": 75.25, "date": "2026-01-19" }, { "model_id": "jdopensource/JoyAI-LLM-Flash", "short_name": "JoyAI-LLM-Flash", "provider": "jdopensource", "score": 74.43, "date": "2026-02-14" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 74.2, "date": "2025-08-04" }, { "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "short_name": "Qwen3-30B-A3B-Thinking-2507", "provider": "Qwen", "score": 72.22, "date": "2025-07-29" }, { "model_id": "zai-org/GLM-4.5-Air", "short_name": "GLM-4.5-Air", "provider": "zai-org", "score": 71.72, "date": "2025-07-20" }, { "model_id": "deepseek-ai/DeepSeek-R1", "short_name": "DeepSeek-R1", "provider": "deepseek-ai", "score": 71.5, "date": "2025-01-20" }, { "model_id": "mistralai/Mistral-Small-4-119B-2603", "short_name": "Mistral-Small-4-119B-2603", "provider": "mistralai", "score": 71.2, "date": "2026-01-23" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 66.7, "date": "2026-04-27" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 65.8, "date": "2025-08-05" }, { "model_id": "Qwen/Qwen3-4B-Instruct-2507", "short_name": "Qwen3-4B-Instruct-2507", "provider": "Qwen", "score": 62.0, "date": "2025-08-05" }, { "model_id": "deepseek-ai/DeepSeek-V3", "short_name": "DeepSeek-V3", "provider": "deepseek-ai", "score": 58.21, "date": "2024-12-25" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Thinking", "short_name": "Mellum2-12B-A2.5B-Thinking", "provider": "JetBrains", "score": 57.6, "date": "2026-05-26" }, { "model_id": "CohereLabs/c4ai-command-a-03-2025", "short_name": "c4ai-command-a-03-2025", "provider": "CohereLabs", "score": 50.51, "date": "2025-03-11" }, { "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct", "short_name": "Llama-3.2-90B-Vision-Instruct", "provider": "meta-llama", "score": 46.09, "date": "2024-09-19" }, { "model_id": "ibm-granite/granite-4.1-30b", "short_name": "granite-4.1-30b", "provider": "ibm-granite", "score": 45.76, "date": "2026-04-06" }, { "model_id": "ibm-granite/granite-4.1-8b", "short_name": "granite-4.1-8b", "provider": "ibm-granite", "score": 41.96, "date": "2026-04-06" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Instruct", "short_name": "Mellum2-12B-A2.5B-Instruct", "provider": "JetBrains", "score": 40.9, "date": "2026-05-26" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Thinking-SFT", "short_name": "Mellum2-12B-A2.5B-Thinking-SFT", "provider": "JetBrains", "score": 39.9, "date": "2026-05-26" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Instruct-SFT", "short_name": "Mellum2-12B-A2.5B-Instruct-SFT", "provider": "JetBrains", "score": 38.9, "date": "2026-05-26" }, { "model_id": "CohereLabs/c4ai-command-r-plus-08-2024", "short_name": "c4ai-command-r-plus-08-2024", "provider": "CohereLabs", "score": 34.34, "date": "2024-08-21" }, { "model_id": "ibm-granite/granite-4.1-3b", "short_name": "granite-4.1-3b", "provider": "ibm-granite", "score": 31.7, "date": "2026-04-06" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Base-Pretrain", "short_name": "Mellum2-12B-A2.5B-Base-Pretrain", "provider": "JetBrains", "score": 31.31, "date": "2026-04-07" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Base", "short_name": "Mellum2-12B-A2.5B-Base", "provider": "JetBrains", "score": 31.31, "date": "2026-05-26" }, { "model_id": "CohereLabs/c4ai-command-r-08-2024", "short_name": "c4ai-command-r-08-2024", "provider": "CohereLabs", "score": 26.77, "date": "2024-08-19" }, { "model_id": "CohereLabs/c4ai-command-r7b-12-2024", "short_name": "c4ai-command-r7b-12-2024", "provider": "CohereLabs", "score": 26.77, "date": "2024-12-11" }, { "model_id": "Muse-research/Muse-2-350M", "short_name": "Muse-2-350M", "provider": "Muse-research", "score": 25.79, "date": "2026-06-06" }, { "model_id": "meta-llama/Llama-3.2-1B-Instruct", "short_name": "Llama-3.2-1B-Instruct", "provider": "meta-llama", "score": 18.69, "date": "2024-09-18" } ] }, "olmOcr": { "name": "olmOCR-bench", "dataset": "allenai/olmOCR-bench", "lower_is_better": false, "models": [ { "model_id": "infly/Infinity-Parser2-Pro", "short_name": "Infinity-Parser2-Pro", "provider": "infly", "score": 87.6, "date": "2026-04-08" }, { "model_id": "datalab-to/chandra-ocr-2", "short_name": "chandra-ocr-2", "provider": "datalab-to", "score": 85.9, "date": "2026-03-16" }, { "model_id": "rednote-hilab/dots.mocr", "short_name": "dots.mocr", "provider": "rednote-hilab", "score": 83.9, "date": "2026-03-19" }, { "model_id": "datalab-to/surya-ocr-2", "short_name": "surya-ocr-2", "provider": "datalab-to", "score": 83.3, "date": "2026-05-14" }, { "model_id": "lightonai/LightOnOCR-2-1B", "short_name": "LightOnOCR-2-1B", "provider": "lightonai", "score": 83.2, "date": "2026-01-16" }, { "model_id": "datalab-to/chandra", "short_name": "chandra", "provider": "datalab-to", "score": 83.1, "date": "2025-10-21" }, { "model_id": "infly/Infinity-Parser-7B", "short_name": "Infinity-Parser-7B", "provider": "infly", "score": 82.5, "date": "2025-10-17" }, { "model_id": "tiiuae/Falcon-OCR", "short_name": "Falcon-OCR", "provider": "tiiuae", "score": 80.3, "date": "2026-02-22" }, { "model_id": "baidu/Qianfan-OCR", "short_name": "Qianfan-OCR", "provider": "baidu", "score": 79.8, "date": "2026-03-18" }, { "model_id": "rednote-hilab/dots.ocr", "short_name": "dots.ocr", "provider": "rednote-hilab", "score": 79.1, "date": "2025-07-30" }, { "model_id": "deepseek-ai/DeepSeek-OCR-2", "short_name": "DeepSeek-OCR-2", "provider": "deepseek-ai", "score": 76.3, "date": "2026-01-27" }, { "model_id": "lightonai/LightOnOCR-1B-1025", "short_name": "LightOnOCR-1B-1025", "provider": "lightonai", "score": 76.1, "date": "2025-10-20" }, { "model_id": "deepseek-ai/DeepSeek-OCR", "short_name": "DeepSeek-OCR", "provider": "deepseek-ai", "score": 75.7, "date": "2025-10-17" }, { "model_id": "opendatalab/MinerU2.5-2509-1.2B", "short_name": "MinerU2.5-2509-1.2B", "provider": "opendatalab", "score": 75.2, "date": "2025-09-17" }, { "model_id": "zai-org/GLM-OCR", "short_name": "GLM-OCR", "provider": "zai-org", "score": 75.2, "date": "2026-01-30" } ] }, "mmluPro": { "name": "MMLU-Pro", "dataset": "TIGER-Lab/MMLU-Pro", "lower_is_better": false, "models": [ { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 88.0, "date": "2025-12-20" }, { "model_id": "internlm/Intern-S2-Preview", "short_name": "Intern-S2-Preview", "provider": "internlm", "score": 88.0, "date": "2026-05-15" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 87.8, "date": "2026-02-16" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 87.5, "date": "2026-04-22" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 87.1, "date": "2026-01-01" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "provider": "nvidia", "score": 86.8, "date": "2026-06-03" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "provider": "nvidia", "score": 86.8, "date": "2026-06-03" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 86.7, "date": "2026-02-24" }, { "model_id": "deepseek-ai/DeepSeek-V4-Flash", "short_name": "DeepSeek-V4-Flash", "provider": "deepseek-ai", "score": 86.4, "date": "2026-04-22" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 86.2, "date": "2026-04-21" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 86.1, "date": "2026-02-24" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 86.0, "date": "2026-02-11" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 85.2, "date": "2026-04-15" }, { "model_id": "deepseek-ai/DeepSeek-R1-0528", "short_name": "DeepSeek-R1-0528", "provider": "deepseek-ai", "score": 85.0, "date": "2025-05-28" }, { "model_id": "zai-org/GLM-4.5", "short_name": "GLM-4.5", "provider": "zai-org", "score": 84.6, "date": "2025-07-20" }, { "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "short_name": "Qwen3-235B-A22B-Thinking-2507", "provider": "Qwen", "score": 84.5, "date": "2025-07-25" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 84.4, "date": "2026-02-01" }, { "model_id": "deepseek-ai/DeepSeek-R1", "short_name": "DeepSeek-R1", "provider": "deepseek-ai", "score": 84.0, "date": "2025-01-20" }, { "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", "short_name": "K-EXAONE-236B-A23B", "provider": "LGAI-EXAONE", "score": 83.8, "date": "2025-12-26" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 83.73, "date": "2026-03-10" }, { "model_id": "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "RedHatAI", "score": 83.73, "date": "2026-03-26" }, { "model_id": "internlm/Intern-S1", "short_name": "Intern-S1", "provider": "internlm", "score": 83.5, "date": "2025-07-24" }, { "model_id": "LGAI-EXAONE/EXAONE-4.5-33B", "short_name": "EXAONE-4.5-33B", "provider": "LGAI-EXAONE", "score": 83.3, "date": "2026-04-04" }, { "model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "short_name": "Qwen3-235B-A22B-Instruct-2507", "provider": "Qwen", "score": 83.0, "date": "2025-07-21" }, { "model_id": "ByteDance-Seed/Seed-OSS-36B-Instruct", "short_name": "Seed-OSS-36B-Instruct", "provider": "ByteDance-Seed", "score": 82.7, "date": "2025-08-20" }, { "model_id": "meituan-longcat/LongCat-Flash-Chat", "short_name": "LongCat-Flash-Chat", "provider": "meituan-longcat", "score": 82.7, "date": "2025-08-29" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 82.0, "date": "2025-10-22" }, { "model_id": "zai-org/GLM-4.5-Air", "short_name": "GLM-4.5-Air", "provider": "zai-org", "score": 81.4, "date": "2025-07-20" }, { "model_id": "deepseek-ai/DeepSeek-V3-0324", "short_name": "DeepSeek-V3-0324", "provider": "deepseek-ai", "score": 81.3, "date": "2025-03-24" }, { "model_id": "MiniMaxAI/MiniMax-M1-40k", "short_name": "MiniMax-M1-40k", "provider": "MiniMaxAI", "score": 81.1, "date": "2025-06-05" }, { "model_id": "jdopensource/JoyAI-LLM-Flash", "short_name": "JoyAI-LLM-Flash", "provider": "jdopensource", "score": 81.02, "date": "2026-02-14" }, { "model_id": "moonshotai/Kimi-K2-Instruct", "short_name": "Kimi-K2-Instruct", "provider": "moonshotai", "score": 81.0, "date": "2025-07-11" }, { "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "short_name": "Qwen3-30B-A3B-Thinking-2507", "provider": "Qwen", "score": 80.9, "date": "2025-07-29" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 80.8, "date": "2025-08-04" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 80.1, "date": "2026-02-12" }, { "model_id": "baidu/ERNIE-4.5-300B-A47B-PT", "short_name": "ERNIE-4.5-300B-A47B-PT", "provider": "baidu", "score": 78.4, "date": "2025-06-28" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "provider": "nvidia", "score": 78.3, "date": "2025-12-04" }, { "model_id": "meituan-longcat/LongCat-Flash-Lite", "short_name": "LongCat-Flash-Lite", "provider": "meituan-longcat", "score": 78.29, "date": "2026-01-27" }, { "model_id": "deepseek-ai/DeepSeek-V3", "short_name": "DeepSeek-V3", "provider": "deepseek-ai", "score": 75.87, "date": "2024-12-25" }, { "model_id": "MiniMaxAI/MiniMax-Text-01", "short_name": "MiniMax-Text-01", "provider": "MiniMaxAI", "score": 75.7, "date": "2025-01-12" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 73.6, "date": "2025-08-04" }, { "model_id": "Qwen/Qwen2.5-72B", "short_name": "Qwen2.5-72B", "provider": "Qwen", "score": 71.59, "date": "2024-09-15" }, { "model_id": "microsoft/phi-4", "short_name": "phi-4", "provider": "microsoft", "score": 70.4, "date": "2024-12-11" }, { "model_id": "Qwen/Qwen3-4B-Instruct-2507", "short_name": "Qwen3-4B-Instruct-2507", "provider": "Qwen", "score": 69.6, "date": "2025-08-05" }, { "model_id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "short_name": "ERNIE-4.5-300B-A47B-Base-PT", "provider": "baidu", "score": 69.5, "date": "2025-06-28" }, { "model_id": "Qwen/Qwen2.5-32B", "short_name": "Qwen2.5-32B", "provider": "Qwen", "score": 69.23, "date": "2024-09-15" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 68.5, "date": "2026-04-27" }, { "model_id": "Qwen/Qwen3-235B-A22B", "short_name": "Qwen3-235B-A22B", "provider": "Qwen", "score": 68.18, "date": "2025-04-27" }, { "model_id": "mistralai/Mistral-Large-Instruct-2411", "short_name": "Mistral-Large-Instruct-2411", "provider": "mistralai", "score": 67.94, "date": "2024-11-14" }, { "model_id": "tencent/Hunyuan-A13B-Instruct", "short_name": "Hunyuan-A13B-Instruct", "provider": "tencent", "score": 67.3, "date": "2025-06-25" }, { "model_id": "mistralai/Mistral-Large-Instruct-2407", "short_name": "Mistral-Large-Instruct-2407", "provider": "mistralai", "score": 65.91, "date": "2024-07-24" }, { "model_id": "deepseek-ai/DeepSeek-V2.5", "short_name": "DeepSeek-V2.5", "provider": "deepseek-ai", "score": 65.83, "date": "2024-09-05" }, { "model_id": "ByteDance-Seed/Seed-OSS-36B-Base", "short_name": "Seed-OSS-36B-Base", "provider": "ByteDance-Seed", "score": 65.1, "date": "2025-08-20" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16", "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16", "provider": "nvidia", "score": 65.1, "date": "2025-12-03" }, { "model_id": "ibm-granite/granite-4.1-30b", "short_name": "granite-4.1-30b", "provider": "ibm-granite", "score": 64.09, "date": "2026-04-06" }, { "model_id": "Qwen/Qwen2.5-14B", "short_name": "Qwen2.5-14B", "provider": "Qwen", "score": 63.69, "date": "2024-09-15" }, { "model_id": "Qwen/Qwen3-30B-A3B-Base", "short_name": "Qwen3-30B-A3B-Base", "provider": "Qwen", "score": 61.7, "date": "2025-04-28" }, { "model_id": "meta-llama/Llama-3.1-405B", "short_name": "Llama-3.1-405B", "provider": "meta-llama", "score": 61.6, "date": "2024-07-16" }, { "model_id": "nvidia/Nemotron-H-56B-Base-8K", "short_name": "Nemotron-H-56B-Base-8K", "provider": "nvidia", "score": 60.5, "date": "2025-04-08" }, { "model_id": "ByteDance-Seed/Seed-OSS-36B-Base-woSyn", "short_name": "Seed-OSS-36B-Base-woSyn", "provider": "ByteDance-Seed", "score": 60.4, "date": "2025-08-20" }, { "model_id": "tencent/Tencent-Hunyuan-Large", "short_name": "Tencent-Hunyuan-Large", "provider": "tencent", "score": 60.2, "date": "2024-10-22" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Base-Pretrain", "short_name": "Mellum2-12B-A2.5B-Base-Pretrain", "provider": "JetBrains", "score": 59.31, "date": "2026-04-07" }, { "model_id": "JetBrains/Mellum2-12B-A2.5B-Base", "short_name": "Mellum2-12B-A2.5B-Base", "provider": "JetBrains", "score": 59.31, "date": "2026-05-26" }, { "model_id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", "short_name": "EXAONE-3.5-32B-Instruct", "provider": "LGAI-EXAONE", "score": 58.91, "date": "2024-12-01" }, { "model_id": "XiaomiMiMo/MiMo-7B-RL", "short_name": "MiMo-7B-RL", "provider": "XiaomiMiMo", "score": 58.6, "date": "2025-04-29" }, { "model_id": "internlm/internlm3-8b-instruct", "short_name": "internlm3-8b-instruct", "provider": "internlm", "score": 57.6, "date": "2025-01-13" }, { "model_id": "baidu/ERNIE-4.5-21B-A3B-Base-PT", "short_name": "ERNIE-4.5-21B-A3B-Base-PT", "provider": "baidu", "score": 56.7, "date": "2025-06-28" }, { "model_id": "ibm-granite/granite-4.1-8b", "short_name": "granite-4.1-8b", "provider": "ibm-granite", "score": 55.99, "date": "2026-04-06" }, { "model_id": "microsoft/Phi-3-medium-4k-instruct", "short_name": "Phi-3-medium-4k-instruct", "provider": "microsoft", "score": 55.7, "date": "2024-05-07" }, { "model_id": "deepseek-ai/DeepSeek-V2-Chat", "short_name": "DeepSeek-V2-Chat", "provider": "deepseek-ai", "score": 54.81, "date": "2024-04-28" }, { "model_id": "mistralai/Mistral-Small-24B-Base-2501", "short_name": "Mistral-Small-24B-Base-2501", "provider": "mistralai", "score": 54.4, "date": "2025-01-23" }, { "model_id": "microsoft/Phi-4-mini-instruct", "short_name": "Phi-4-mini-instruct", "provider": "microsoft", "score": 52.8, "date": "2025-02-19" }, { "model_id": "meta-llama/Meta-Llama-3-70B", "short_name": "Meta-Llama-3-70B", "provider": "meta-llama", "score": 52.78, "date": "2024-04-17" }, { "model_id": "meta-llama/Llama-3.1-70B", "short_name": "Llama-3.1-70B", "provider": "meta-llama", "score": 52.47, "date": "2024-07-14" }, { "model_id": "01-ai/Yi-1.5-34B-Chat", "short_name": "Yi-1.5-34B-Chat", "provider": "01-ai", "score": 52.29, "date": "2024-05-10" }, { "model_id": "microsoft/Phi-3-medium-128k-instruct", "short_name": "Phi-3-medium-128k-instruct", "provider": "microsoft", "score": 51.91, "date": "2024-05-07" }, { "model_id": "TIGER-Lab/MAmmoTH2-8x7B-Plus", "short_name": "MAmmoTH2-8x7B-Plus", "provider": "TIGER-Lab", "score": 50.4, "date": "2024-05-06" }, { "model_id": "Qwen/Qwen1.5-110B", "short_name": "Qwen1.5-110B", "provider": "Qwen", "score": 49.93, "date": "2024-04-25" }, { "model_id": "ibm-granite/granite-4.1-3b", "short_name": "granite-4.1-3b", "provider": "ibm-granite", "score": 49.83, "date": "2026-04-06" }, { "model_id": "ai21labs/AI21-Jamba-Large-1.5", "short_name": "AI21-Jamba-Large-1.5", "provider": "ai21labs", "score": 49.46, "date": "2024-08-19" }, { "model_id": "mistralai/Mistral-Small-Instruct-2409", "short_name": "Mistral-Small-Instruct-2409", "provider": "mistralai", "score": 48.4, "date": "2024-09-17" }, { "model_id": "zai-org/glm-4-9b", "short_name": "glm-4-9b", "provider": "zai-org", "score": 47.92, "date": "2024-06-04" }, { "model_id": "microsoft/Phi-3.5-mini-instruct", "short_name": "Phi-3.5-mini-instruct", "provider": "microsoft", "score": 47.87, "date": "2024-08-16" }, { "model_id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", "short_name": "EXAONE-3.5-7.8B-Instruct", "provider": "LGAI-EXAONE", "score": 46.24, "date": "2024-12-01" }, { "model_id": "01-ai/Yi-1.5-9B-Chat", "short_name": "Yi-1.5-9B-Chat", "provider": "01-ai", "score": 45.95, "date": "2024-05-10" }, { "model_id": "microsoft/Phi-3-mini-4k-instruct", "short_name": "Phi-3-mini-4k-instruct", "provider": "microsoft", "score": 45.66, "date": "2024-04-22" }, { "model_id": "CohereLabs/aya-expanse-32b", "short_name": "aya-expanse-32b", "provider": "CohereLabs", "score": 45.41, "date": "2024-10-23" }, { "model_id": "google/gemma-2-9b", "short_name": "gemma-2-9b", "provider": "google", "score": 45.1, "date": "2024-06-24" }, { "model_id": "Qwen/Qwen2.5-7B", "short_name": "Qwen2.5-7B", "provider": "Qwen", "score": 45.0, "date": "2024-09-15" }, { "model_id": "microsoft/Phi-3-mini-128k-instruct", "short_name": "Phi-3-mini-128k-instruct", "provider": "microsoft", "score": 43.86, "date": "2024-04-22" }, { "model_id": "Qwen/Qwen2.5-3B", "short_name": "Qwen2.5-3B", "provider": "Qwen", "score": 43.73, "date": "2024-09-15" }, { "model_id": "TIGER-Lab/MAmmoTH2-8B-Plus", "short_name": "MAmmoTH2-8B-Plus", "provider": "TIGER-Lab", "score": 43.35, "date": "2024-05-06" }, { "model_id": "01-ai/Yi-34B", "short_name": "Yi-34B", "provider": "01-ai", "score": 43.03, "date": "2023-11-01" }, { "model_id": "mistralai/Mathstral-7B-v0.1", "short_name": "Mathstral-7B-v0.1", "provider": "mistralai", "score": 42.0, "date": "2024-07-16" }, { "model_id": "XiaomiMiMo/MiMo-7B-Base", "short_name": "MiMo-7B-Base", "provider": "XiaomiMiMo", "score": 41.9, "date": "2025-04-29" }, { "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", "short_name": "DeepSeek-Coder-V2-Lite-Instruct", "provider": "deepseek-ai", "score": 41.57, "date": "2024-06-14" }, { "model_id": "mistralai/Mixtral-8x7B-v0.1", "short_name": "Mixtral-8x7B-v0.1", "provider": "mistralai", "score": 41.03, "date": "2023-12-01" }, { "model_id": "meta-llama/Meta-Llama-3-8B-Instruct", "short_name": "Meta-Llama-3-8B-Instruct", "provider": "meta-llama", "score": 40.98, "date": "2024-04-17" }, { "model_id": "TIGER-Lab/MAmmoTH2-7B-Plus", "short_name": "MAmmoTH2-7B-Plus", "provider": "TIGER-Lab", "score": 40.85, "date": "2024-05-06" }, { "model_id": "Qwen/Qwen2-7B", "short_name": "Qwen2-7B", "provider": "Qwen", "score": 40.73, "date": "2024-06-04" }, { "model_id": "mistralai/Mistral-Nemo-Base-2407", "short_name": "Mistral-Nemo-Base-2407", "provider": "mistralai", "score": 39.77, "date": "2024-07-18" }, { "model_id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", "short_name": "EXAONE-3.5-2.4B-Instruct", "provider": "LGAI-EXAONE", "score": 39.1, "date": "2024-12-01" }, { "model_id": "01-ai/Yi-1.5-6B-Chat", "short_name": "Yi-1.5-6B-Chat", "provider": "01-ai", "score": 38.23, "date": "2024-05-11" }, { "model_id": "Qwen/Qwen1.5-14B-Chat", "short_name": "Qwen1.5-14B-Chat", "provider": "Qwen", "score": 38.02, "date": "2024-01-30" }, { "model_id": "mistralai/Ministral-8B-Instruct-2410", "short_name": "Ministral-8B-Instruct-2410", "provider": "mistralai", "score": 37.93, "date": "2024-10-15" }, { "model_id": "CohereLabs/c4ai-command-r-v01", "short_name": "c4ai-command-r-v01", "provider": "CohereLabs", "score": 37.9, "date": "2024-03-11" }, { "model_id": "internlm/internlm2-math-plus-20b", "short_name": "internlm2-math-plus-20b", "provider": "internlm", "score": 37.1, "date": "2024-05-24" }, { "model_id": "GSAI-ML/LLaDA-8B-Instruct", "short_name": "LLaDA-8B-Instruct", "provider": "GSAI-ML", "score": 37.0, "date": "2025-02-19" }, { "model_id": "abacusai/Llama-3-Smaug-8B", "short_name": "Llama-3-Smaug-8B", "provider": "abacusai", "score": 36.93, "date": "2024-04-19" }, { "model_id": "meta-llama/Llama-3.1-8B", "short_name": "Llama-3.1-8B", "provider": "meta-llama", "score": 36.6, "date": "2024-07-14" }, { "model_id": "meta-llama/Meta-Llama-3-8B", "short_name": "Meta-Llama-3-8B", "provider": "meta-llama", "score": 35.36, "date": "2024-04-17" }, { "model_id": "deepseek-ai/deepseek-math-7b-instruct", "short_name": "deepseek-math-7b-instruct", "provider": "deepseek-ai", "score": 35.3, "date": "2024-02-05" }, { "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", "short_name": "DeepSeek-Coder-V2-Lite-Base", "provider": "deepseek-ai", "score": 34.37, "date": "2024-06-14" }, { "model_id": "CohereLabs/aya-expanse-8b", "short_name": "aya-expanse-8b", "provider": "CohereLabs", "score": 33.74, "date": "2024-10-23" }, { "model_id": "google/gemma-7b", "short_name": "gemma-7b", "provider": "google", "score": 33.73, "date": "2024-02-08" }, { "model_id": "internlm/internlm2-math-plus-7b", "short_name": "internlm2-math-plus-7b", "provider": "internlm", "score": 33.5, "date": "2024-05-24" }, { "model_id": "ibm-granite/granite-3.1-8b-base", "short_name": "granite-3.1-8b-base", "provider": "ibm-granite", "score": 33.08, "date": "2024-12-06" }, { "model_id": "Qwen/Qwen2.5-1.5B", "short_name": "Qwen2.5-1.5B", "provider": "Qwen", "score": 32.1, "date": "2024-09-15" }, { "model_id": "ibm-granite/granite-3.0-8b-base", "short_name": "granite-3.0-8b-base", "provider": "ibm-granite", "score": 31.03, "date": "2024-10-02" }, { "model_id": "mistralai/Mistral-7B-Instruct-v0.2", "short_name": "Mistral-7B-Instruct-v0.2", "provider": "mistralai", "score": 30.84, "date": "2023-12-11" }, { "model_id": "mistral-community/Mistral-7B-v0.2", "short_name": "Mistral-7B-v0.2", "provider": "mistral-community", "score": 30.43, "date": "2024-03-23" }, { "model_id": "Qwen/Qwen1.5-7B-Chat", "short_name": "Qwen1.5-7B-Chat", "provider": "Qwen", "score": 29.06, "date": "2024-01-30" }, { "model_id": "01-ai/Yi-6B-Chat", "short_name": "Yi-6B-Chat", "provider": "01-ai", "score": 28.84, "date": "2023-11-22" }, { "model_id": "01-ai/Yi-6B", "short_name": "Yi-6B", "provider": "01-ai", "score": 26.51, "date": "2023-11-01" }, { "model_id": "ibm-granite/granite-3.1-2b-base", "short_name": "granite-3.1-2b-base", "provider": "ibm-granite", "score": 23.89, "date": "2024-12-06" }, { "model_id": "EleutherAI/llemma_7b", "short_name": "llemma_7b", "provider": "EleutherAI", "score": 23.45, "date": "2023-09-12" }, { "model_id": "Qwen/Qwen2-1.5B-Instruct", "short_name": "Qwen2-1.5B-Instruct", "provider": "Qwen", "score": 22.62, "date": "2024-06-03" }, { "model_id": "Qwen/Qwen2-1.5B", "short_name": "Qwen2-1.5B", "provider": "Qwen", "score": 22.56, "date": "2024-05-31" }, { "model_id": "meta-llama/Llama-3.2-3B", "short_name": "Llama-3.2-3B", "provider": "meta-llama", "score": 22.17, "date": "2024-09-18" }, { "model_id": "ibm-granite/granite-3.0-2b-base", "short_name": "granite-3.0-2b-base", "provider": "ibm-granite", "score": 21.72, "date": "2024-10-02" }, { "model_id": "ibm-granite/granite-3.1-3b-a800m-base", "short_name": "granite-3.1-3b-a800m-base", "provider": "ibm-granite", "score": 20.39, "date": "2024-12-06" }, { "model_id": "HuggingFaceTB/SmolLM2-1.7B", "short_name": "SmolLM2-1.7B", "provider": "HuggingFaceTB", "score": 18.31, "date": "2024-10-30" }, { "model_id": "google/gemma-2b", "short_name": "gemma-2b", "provider": "google", "score": 15.85, "date": "2024-02-08" }, { "model_id": "Qwen/Qwen2-0.5B", "short_name": "Qwen2-0.5B", "provider": "Qwen", "score": 14.97, "date": "2024-05-31" }, { "model_id": "Qwen/Qwen2.5-0.5B", "short_name": "Qwen2.5-0.5B", "provider": "Qwen", "score": 14.92, "date": "2024-09-15" }, { "model_id": "ibm-granite/granite-3.1-1b-a400m-base", "short_name": "granite-3.1-1b-a400m-base", "provider": "ibm-granite", "score": 12.34, "date": "2024-12-06" }, { "model_id": "meta-llama/Llama-3.2-1B", "short_name": "Llama-3.2-1B", "provider": "meta-llama", "score": 11.95, "date": "2024-09-18" }, { "model_id": "HuggingFaceTB/SmolLM-1.7B", "short_name": "SmolLM-1.7B", "provider": "HuggingFaceTB", "score": 11.93, "date": "2024-07-14" }, { "model_id": "HuggingFaceTB/SmolLM2-360M", "short_name": "SmolLM2-360M", "provider": "HuggingFaceTB", "score": 11.38, "date": "2024-10-31" }, { "model_id": "HuggingFaceTB/SmolLM-135M", "short_name": "SmolLM-135M", "provider": "HuggingFaceTB", "score": 11.22, "date": "2024-07-14" }, { "model_id": "HuggingFaceTB/SmolLM-360M", "short_name": "SmolLM-360M", "provider": "HuggingFaceTB", "score": 10.95, "date": "2024-07-14" }, { "model_id": "HuggingFaceTB/SmolLM2-135M", "short_name": "SmolLM2-135M", "provider": "HuggingFaceTB", "score": 10.85, "date": "2024-10-31" }, { "model_id": "Qwen/Qwen2.5-VL-72B-Instruct", "short_name": "Qwen2.5-VL-72B-Instruct", "provider": "Qwen", "score": 0.65, "date": "2025-01-27" } ] }, "MMMU_Pro": { "name": "MMMU_Pro", "dataset": "MMMU/MMMU_Pro", "lower_is_better": false, "models": [ { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 79.4, "date": "2026-04-14" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 78.5, "date": "2026-01-01" }, { "model_id": "internlm/Intern-S2-Preview", "short_name": "Intern-S2-Preview", "provider": "internlm", "score": 76.88, "date": "2026-05-15" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 75.8, "date": "2026-04-21" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 75.3, "date": "2026-04-15" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 75.0, "date": "2026-02-24" }, { "model_id": "AIDC-AI/Ovis2.6-80B-A3B", "short_name": "Ovis2.6-80B-A3B", "provider": "AIDC-AI", "score": 66.3, "date": "2026-05-11" }, { "model_id": "Qwen/Qwen2.5-VL-7B-Instruct", "short_name": "Qwen2.5-VL-7B-Instruct", "provider": "Qwen", "score": 34.3, "date": "2025-01-26" }, { "model_id": "Qwen/Qwen2.5-VL-3B-Instruct", "short_name": "Qwen2.5-VL-3B-Instruct", "provider": "Qwen", "score": 32.7, "date": "2025-01-26" } ] }, "sweVerified": { "name": "SWE-bench Verified", "dataset": "SWE-bench/SWE-bench_Verified", "lower_is_better": false, "models": [ { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 80.6, "date": "2026-04-22" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 80.2, "date": "2026-04-14" }, { "model_id": "deepseek-ai/DeepSeek-V4-Flash", "short_name": "DeepSeek-V4-Flash", "provider": "deepseek-ai", "score": 79.0, "date": "2026-04-22" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 78.9, "date": "2026-04-27" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 77.8, "date": "2026-02-11" }, { "model_id": "mistralai/Mistral-Medium-3.5-128B", "short_name": "Mistral-Medium-3.5-128B", "provider": "mistralai", "score": 77.6, "date": "2026-03-31" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 77.2, "date": "2026-04-21" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 76.4, "date": "2026-02-16" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 75.8, "date": "2026-02-12" }, { "model_id": "Multilingual-Multimodal-NLP/IndustrialCoder", "short_name": "IndustrialCoder", "provider": "Multilingual-Multimodal-NLP", "score": 74.8, "date": "2026-03-13" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 74.4, "date": "2026-02-01" }, { "model_id": "tencent/Hy3-preview", "short_name": "Hy3-preview", "provider": "tencent", "score": 74.4, "date": "2026-04-13" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 74.0, "date": "2025-12-20" }, { "model_id": "inclusionAI/Ring-2.6-1T", "short_name": "Ring-2.6-1T", "provider": "inclusionAI", "score": 74.0, "date": "2026-05-14" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 73.8, "date": "2025-12-22" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 73.4, "date": "2026-04-15" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 72.4, "date": "2026-02-24" }, { "model_id": "inclusionAI/Ling-2.6-1T", "short_name": "Ling-2.6-1T", "provider": "inclusionAI", "score": 72.2, "date": "2026-04-29" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 72.0, "date": "2026-02-24" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "provider": "nvidia", "score": 71.9, "date": "2026-06-03" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 71.3, "date": "2025-11-04" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 70.8, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3-Coder-Next", "short_name": "Qwen3-Coder-Next", "provider": "Qwen", "score": 70.6, "date": "2026-01-30" }, { "model_id": "poolside/Laguna-XS.2", "short_name": "Laguna-XS.2", "provider": "poolside", "score": 69.9, "date": "2026-04-23" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "short_name": "NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4", "provider": "nvidia", "score": 69.7, "date": "2026-06-03" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 69.4, "date": "2025-10-22" }, { "model_id": "MuVeraAI/Laguna-XS.2", "short_name": "Laguna-XS.2", "provider": "MuVeraAI", "score": 68.2, "date": "2026-04-28" }, { "model_id": "internlm/Intern-S2-Preview", "short_name": "Intern-S2-Preview", "provider": "internlm", "score": 64.0, "date": "2026-05-15" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 62.4, "date": "2025-08-04" }, { "model_id": "inclusionAI/Ling-2.6-flash", "short_name": "Ling-2.6-flash", "provider": "inclusionAI", "score": 61.2, "date": "2026-04-28" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 60.7, "date": "2025-08-04" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 60.47, "date": "2026-03-10" }, { "model_id": "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "RedHatAI", "score": 60.47, "date": "2026-03-26" }, { "model_id": "zai-org/GLM-4.7-Flash", "short_name": "GLM-4.7-Flash", "provider": "zai-org", "score": 59.2, "date": "2026-01-19" }, { "model_id": "facebook/cwm", "short_name": "cwm", "provider": "facebook", "score": 53.9, "date": "2025-08-25" } ] }, "WildClawBench": { "name": "WildClawBench", "dataset": "internlm/WildClawBench", "lower_is_better": false, "models": [ { "model_id": "nex-agi/Nex-N2-Pro", "short_name": "Nex-N2-Pro", "provider": "nex-agi", "score": 53.5, "date": "2026-06-03" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 48.2, "date": "2026-04-03" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 43.7, "date": "2026-04-22" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 43.0, "date": "2026-04-27" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 42.6, "date": "2026-02-11" }, { "model_id": "internlm/Intern-S2-Preview", "short_name": "Intern-S2-Preview", "provider": "internlm", "score": 39.2, "date": "2026-05-15" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 34.5, "date": "2026-02-16" }, { "model_id": "MiniMaxAI/MiniMax-M2.7", "short_name": "MiniMax-M2.7", "provider": "MiniMaxAI", "score": 33.8, "date": "2026-04-09" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 30.8, "date": "2026-01-01" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 27.1, "date": "2026-02-12" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 26.7, "date": "2026-02-01" } ] }, "ParseBench": { "name": "ParseBench", "dataset": "llamaindex/ParseBench", "lower_is_better": false, "models": [ { "model_id": "infly/Infinity-Parser2-Pro", "short_name": "Infinity-Parser2-Pro", "provider": "infly", "score": 74.3, "date": "2026-04-08" }, { "model_id": "infly/Infinity-Parser2-Flash", "short_name": "Infinity-Parser2-Flash", "provider": "infly", "score": 73.25, "date": "2026-02-27" }, { "model_id": "datalab-to/chandra-ocr-2", "short_name": "chandra-ocr-2", "provider": "datalab-to", "score": 70.1, "date": "2026-03-16" }, { "model_id": "datalab-to/surya-ocr-2", "short_name": "surya-ocr-2", "provider": "datalab-to", "score": 64.83, "date": "2026-05-14" }, { "model_id": "rednote-hilab/dots.mocr", "short_name": "dots.mocr", "provider": "rednote-hilab", "score": 55.8, "date": "2026-03-19" }, { "model_id": "tiiuae/Falcon-OCR", "short_name": "Falcon-OCR", "provider": "tiiuae", "score": 53.08, "date": "2026-02-22" }, { "model_id": "docling-project/docling-models", "short_name": "docling-models", "provider": "docling-project", "score": 50.6, "date": "2024-07-02" }, { "model_id": "lightonai/LightOnOCR-2-1B", "short_name": "LightOnOCR-2-1B", "provider": "lightonai", "score": 48.0, "date": "2026-01-16" }, { "model_id": "Qwen/Qwen3-VL-8B-Instruct", "short_name": "Qwen3-VL-8B-Instruct", "provider": "Qwen", "score": 46.8, "date": "2025-10-11" }, { "model_id": "baidu/Qianfan-OCR", "short_name": "Qianfan-OCR", "provider": "baidu", "score": 46.2, "date": "2026-03-18" }, { "model_id": "opendatalab/MinerU2.5-2509-1.2B", "short_name": "MinerU2.5-2509-1.2B", "provider": "opendatalab", "score": 45.9, "date": "2025-09-17" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 44.1, "date": "2026-04-15" }, { "model_id": "deepseek-ai/DeepSeek-OCR-2", "short_name": "DeepSeek-OCR-2", "provider": "deepseek-ai", "score": 41.2, "date": "2026-01-27" }, { "model_id": "ibm-granite/granite-vision-4.1-4b", "short_name": "granite-vision-4.1-4b", "provider": "ibm-granite", "score": 39.45, "date": "2026-04-16" }, { "model_id": "zai-org/GLM-OCR", "short_name": "GLM-OCR", "provider": "zai-org", "score": 29.6, "date": "2026-01-30" } ] }, "open_asr_leaderboard": { "name": "open-asr-leaderboard", "dataset": "hf-audio/open-asr-leaderboard", "lower_is_better": true, "models": [ { "model_id": "CohereLabs/cohere-transcribe-03-2026", "short_name": "cohere-transcribe-03-2026", "provider": "CohereLabs", "score": 5.42, "date": "2026-03-24" }, { "model_id": "Qwen/Qwen3-ASR-1.7B", "short_name": "Qwen3-ASR-1.7B", "provider": "Qwen", "score": 5.76, "date": "2026-01-28" }, { "model_id": "microsoft/Phi-4-multimodal-instruct", "short_name": "Phi-4-multimodal-instruct", "provider": "microsoft", "score": 6.02, "date": "2025-02-24" }, { "model_id": "nvidia/parakeet-tdt-0.6b-v2", "short_name": "parakeet-tdt-0.6b-v2", "provider": "nvidia", "score": 6.05, "date": "2025-04-15" }, { "model_id": "nvidia/parakeet-tdt-0.6b-v3", "short_name": "parakeet-tdt-0.6b-v3", "provider": "nvidia", "score": 6.32, "date": "2025-08-04" }, { "model_id": "nvidia/canary-1b-flash", "short_name": "canary-1b-flash", "provider": "nvidia", "score": 6.35, "date": "2025-03-07" }, { "model_id": "kyutai/stt-2.6b-en", "short_name": "stt-2.6b-en", "provider": "kyutai", "score": 6.4, "date": "2025-06-06" }, { "model_id": "Qwen/Qwen3-ASR-0.6B", "short_name": "Qwen3-ASR-0.6B", "provider": "Qwen", "score": 6.42, "date": "2026-01-28" }, { "model_id": "nvidia/canary-1b", "short_name": "canary-1b", "provider": "nvidia", "score": 6.5, "date": "2024-02-07" }, { "model_id": "UsefulSensors/moonshine-streaming-medium", "short_name": "moonshine-streaming-medium", "provider": "UsefulSensors", "score": 6.66, "date": "2026-01-06" }, { "model_id": "soundsgoodai/Zipformer-transducer-XL-290M", "short_name": "Zipformer-transducer-XL-290M", "provider": "soundsgoodai", "score": 6.79, "date": "2026-05-12" }, { "model_id": "nvidia/parakeet-tdt-1.1b", "short_name": "parakeet-tdt-1.1b", "provider": "nvidia", "score": 7.01, "date": "2024-01-25" }, { "model_id": "zai-org/GLM-ASR-Nano-2512", "short_name": "GLM-ASR-Nano-2512", "provider": "zai-org", "score": 7.03, "date": "2025-12-09" }, { "model_id": "mistralai/Voxtral-Mini-3B-2507", "short_name": "Voxtral-Mini-3B-2507", "provider": "mistralai", "score": 7.05, "date": "2025-07-01" }, { "model_id": "nvidia/canary-180m-flash", "short_name": "canary-180m-flash", "provider": "nvidia", "score": 7.12, "date": "2025-03-11" }, { "model_id": "nvidia/parakeet-rnnt-1.1b", "short_name": "parakeet-rnnt-1.1b", "provider": "nvidia", "score": 7.12, "date": "2023-12-27" }, { "model_id": "nvidia/canary-1b-v2", "short_name": "canary-1b-v2", "provider": "nvidia", "score": 7.15, "date": "2025-08-04" }, { "model_id": "distil-whisper/distil-large-v3.5", "short_name": "distil-large-v3.5", "provider": "distil-whisper", "score": 7.21, "date": "2024-12-05" }, { "model_id": "nvidia/parakeet-ctc-1.1b", "short_name": "parakeet-ctc-1.1b", "provider": "nvidia", "score": 7.4, "date": "2023-12-28" }, { "model_id": "espnet/owsm_ctc_v4_1B", "short_name": "owsm_ctc_v4_1B", "provider": "espnet", "score": 7.42, "date": "2025-01-16" }, { "model_id": "openai/whisper-large-v3", "short_name": "whisper-large-v3", "provider": "openai", "score": 7.44, "date": "2023-11-07" }, { "model_id": "nvidia/parakeet-tdt_ctc-110m", "short_name": "parakeet-tdt_ctc-110m", "provider": "nvidia", "score": 7.49, "date": "2024-09-17" }, { "model_id": "nvidia/parakeet-rnnt-0.6b", "short_name": "parakeet-rnnt-0.6b", "provider": "nvidia", "score": 7.5, "date": "2023-12-28" }, { "model_id": "distil-whisper/distil-large-v3", "short_name": "distil-large-v3", "provider": "distil-whisper", "score": 7.52, "date": "2024-03-21" }, { "model_id": "nvidia/parakeet-ctc-0.6b", "short_name": "parakeet-ctc-0.6b", "provider": "nvidia", "score": 7.69, "date": "2023-12-28" }, { "model_id": "microsoft/VibeVoice-ASR-HF", "short_name": "VibeVoice-ASR-HF", "provider": "microsoft", "score": 7.77, "date": "2026-03-02" }, { "model_id": "openai/whisper-large-v2", "short_name": "whisper-large-v2", "provider": "openai", "score": 7.83, "date": "2022-12-05" }, { "model_id": "UsefulSensors/moonshine-streaming-small", "short_name": "moonshine-streaming-small", "provider": "UsefulSensors", "score": 7.84, "date": "2026-01-06" }, { "model_id": "distil-whisper/distil-large-v2", "short_name": "distil-large-v2", "provider": "distil-whisper", "score": 7.92, "date": "2023-10-24" }, { "model_id": "openai/whisper-large", "short_name": "whisper-large", "provider": "openai", "score": 7.94, "date": "2022-09-26" }, { "model_id": "openai/whisper-medium.en", "short_name": "whisper-medium.en", "provider": "openai", "score": 8.09, "date": "2022-09-26" }, { "model_id": "espnet/owsm_ctc_v3.1_1B", "short_name": "owsm_ctc_v3.1_1B", "provider": "espnet", "score": 8.12, "date": "2024-02-23" }, { "model_id": "nvidia/stt_en_conformer_ctc_large", "short_name": "stt_en_conformer_ctc_large", "provider": "nvidia", "score": 8.32, "date": "2022-04-09" }, { "model_id": "speechbrain/asr-conformer-loquacious", "short_name": "asr-conformer-loquacious", "provider": "speechbrain", "score": 8.48, "date": "2025-02-06" }, { "model_id": "distil-whisper/distil-small.en", "short_name": "distil-small.en", "provider": "distil-whisper", "score": 8.57, "date": "2023-12-06" }, { "model_id": "openai/whisper-small.en", "short_name": "whisper-small.en", "provider": "openai", "score": 8.59, "date": "2022-09-26" }, { "model_id": "distil-whisper/distil-medium.en", "short_name": "distil-medium.en", "provider": "distil-whisper", "score": 8.77, "date": "2023-10-24" }, { "model_id": "abr-ai/niagara-38m-batch.en", "short_name": "niagara-38m-batch.en", "provider": "abr-ai", "score": 8.91, "date": "2026-02-19" }, { "model_id": "nvidia/stt_en_fastconformer_ctc_large", "short_name": "stt_en_fastconformer_ctc_large", "provider": "nvidia", "score": 8.96, "date": "2023-06-08" }, { "model_id": "nvidia/stt_en_fastconformer_transducer_large", "short_name": "stt_en_fastconformer_transducer_large", "provider": "nvidia", "score": 9.06, "date": "2023-06-08" }, { "model_id": "UsefulSensors/moonshine-base", "short_name": "moonshine-base", "provider": "UsefulSensors", "score": 9.99, "date": "2024-11-02" }, { "model_id": "openai/whisper-base.en", "short_name": "whisper-base.en", "provider": "openai", "score": 10.32, "date": "2022-09-26" }, { "model_id": "abr-ai/niagara-19m-batch.en", "short_name": "niagara-19m-batch.en", "provider": "abr-ai", "score": 10.47, "date": "2025-11-13" }, { "model_id": "nvidia/stt_en_conformer_ctc_small", "short_name": "stt_en_conformer_ctc_small", "provider": "nvidia", "score": 11.16, "date": "2023-06-12" }, { "model_id": "UsefulSensors/moonshine-streaming-tiny", "short_name": "moonshine-streaming-tiny", "provider": "UsefulSensors", "score": 12.0, "date": "2026-01-06" }, { "model_id": "UsefulSensors/moonshine-tiny", "short_name": "moonshine-tiny", "provider": "UsefulSensors", "score": 12.65, "date": "2024-10-30" }, { "model_id": "openai/whisper-tiny.en", "short_name": "whisper-tiny.en", "provider": "openai", "score": 12.81, "date": "2022-09-26" }, { "model_id": "speechbrain/asr-wav2vec2-librispeech", "short_name": "asr-wav2vec2-librispeech", "provider": "speechbrain", "score": 14.35, "date": "2022-06-05" }, { "model_id": "facebook/wav2vec2-large-960h-lv60-self", "short_name": "wav2vec2-large-960h-lv60-self", "provider": "facebook", "score": 21.27, "date": "2022-03-02" }, { "model_id": "facebook/mms-1b-all", "short_name": "mms-1b-all", "provider": "facebook", "score": 22.54, "date": "2023-05-27" }, { "model_id": "facebook/hubert-xlarge-ls960-ft", "short_name": "hubert-xlarge-ls960-ft", "provider": "facebook", "score": 22.55, "date": "2022-03-02" }, { "model_id": "facebook/hubert-large-ls960-ft", "short_name": "hubert-large-ls960-ft", "provider": "facebook", "score": 22.69, "date": "2022-03-02" }, { "model_id": "facebook/wav2vec2-large-robust-ft-libri-960h", "short_name": "wav2vec2-large-robust-ft-libri-960h", "provider": "facebook", "score": 22.93, "date": "2022-03-02" }, { "model_id": "facebook/data2vec-audio-large-960h", "short_name": "data2vec-audio-large-960h", "provider": "facebook", "score": 23.21, "date": "2022-04-02" }, { "model_id": "facebook/wav2vec2-conformer-rope-large-960h-ft", "short_name": "wav2vec2-conformer-rope-large-960h-ft", "provider": "facebook", "score": 23.28, "date": "2022-04-18" }, { "model_id": "facebook/wav2vec2-conformer-rel-pos-large-960h-ft", "short_name": "wav2vec2-conformer-rel-pos-large-960h-ft", "provider": "facebook", "score": 23.29, "date": "2022-04-18" }, { "model_id": "facebook/wav2vec2-large-960h", "short_name": "wav2vec2-large-960h", "provider": "facebook", "score": 26.77, "date": "2022-03-02" }, { "model_id": "facebook/data2vec-audio-base-960h", "short_name": "data2vec-audio-base-960h", "provider": "facebook", "score": 28.3, "date": "2022-03-02" }, { "model_id": "facebook/wav2vec2-base-960h", "short_name": "wav2vec2-base-960h", "provider": "facebook", "score": 29.4, "date": "2022-03-02" }, { "model_id": "facebook/mms-1b-fl102", "short_name": "mms-1b-fl102", "provider": "facebook", "score": 39.8, "date": "2023-05-27" } ] }, "LEXam": { "name": "LEXam", "dataset": "LEXam-Benchmark/LEXam", "lower_is_better": false, "models": [ { "model_id": "deepseek-ai/DeepSeek-R1", "short_name": "DeepSeek-R1", "provider": "deepseek-ai", "score": 52.41, "date": "2025-01-20" }, { "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "short_name": "Qwen3-235B-A22B-Thinking-2507", "provider": "Qwen", "score": 48.19, "date": "2025-07-25" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 47.71, "date": "2025-08-04" }, { "model_id": "deepseek-ai/DeepSeek-V3", "short_name": "DeepSeek-V3", "provider": "deepseek-ai", "score": 46.57, "date": "2024-12-25" }, { "model_id": "Qwen/Qwen3-32B", "short_name": "Qwen3-32B", "provider": "Qwen", "score": 45.3, "date": "2025-04-27" }, { "model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "short_name": "Qwen3-Next-80B-A3B-Thinking", "provider": "Qwen", "score": 43.31, "date": "2025-09-09" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 40.78, "date": "2025-08-04" }, { "model_id": "microsoft/phi-4", "short_name": "phi-4", "provider": "microsoft", "score": 40.66, "date": "2024-12-11" }, { "model_id": "mistralai/Ministral-8B-Instruct-2410", "short_name": "Ministral-8B-Instruct-2410", "provider": "mistralai", "score": 26.27, "date": "2024-10-15" } ] }, "swePro": { "name": "SWE-bench Pro", "dataset": "ScaleAI/SWE-bench_Pro", "lower_is_better": false, "models": [ { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 58.6, "date": "2026-04-14" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 58.4, "date": "2026-04-03" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 57.2, "date": "2026-04-27" }, { "model_id": "stepfun-ai/Step-3.7-Flash", "short_name": "Step-3.7-Flash", "provider": "stepfun-ai", "score": 56.3, "date": "2026-05-23" }, { "model_id": "MiniMaxAI/MiniMax-M2.7", "short_name": "MiniMax-M2.7", "provider": "MiniMaxAI", "score": 56.2, "date": "2026-04-09" }, { "model_id": "XiaomiMiMo/MiMo-V2.5", "short_name": "MiMo-V2.5", "provider": "XiaomiMiMo", "score": 56.1, "date": "2026-04-27" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 55.4, "date": "2026-02-12" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 55.4, "date": "2026-04-22" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 53.5, "date": "2026-04-21" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 50.7, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 49.5, "date": "2026-04-15" }, { "model_id": "poolside/Laguna-XS.2", "short_name": "Laguna-XS.2", "provider": "poolside", "score": 46.3, "date": "2026-04-23" }, { "model_id": "MuVeraAI/Laguna-XS.2", "short_name": "Laguna-XS.2", "provider": "MuVeraAI", "score": 44.5, "date": "2026-04-28" }, { "model_id": "Qwen/Qwen3-Coder-Next", "short_name": "Qwen3-Coder-Next", "provider": "Qwen", "score": 44.3, "date": "2026-01-30" }, { "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "short_name": "Qwen3-Coder-480B-A35B-Instruct", "provider": "Qwen", "score": 38.7, "date": "2025-07-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 36.81, "date": "2025-12-20" }, { "model_id": "moonshotai/Kimi-K2-Instruct", "short_name": "Kimi-K2-Instruct", "provider": "moonshotai", "score": 27.67, "date": "2025-07-11" }, { "model_id": "Qwen/Qwen3-235B-A22B", "short_name": "Qwen3-235B-A22B", "provider": "Qwen", "score": 21.41, "date": "2025-04-27" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 16.2, "date": "2025-08-04" }, { "model_id": "zai-org/GLM-4.6", "short_name": "GLM-4.6", "provider": "zai-org", "score": 9.67, "date": "2025-09-29" } ] }, "apex_agents": { "name": "apex-agents", "dataset": "mercor/apex-agents", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 17.2, "date": "2026-02-11" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 14.4, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 13.6, "date": "2026-02-16" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 6.2, "date": "2026-02-12" }, { "model_id": "moonshotai/Kimi-K2-Instruct", "short_name": "Kimi-K2-Instruct", "provider": "moonshotai", "score": 4.1, "date": "2025-07-11" }, { "model_id": "zai-org/GLM-4.6", "short_name": "GLM-4.6", "provider": "zai-org", "score": 4.0, "date": "2025-09-29" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 3.1, "date": "2025-12-22" } ] }, "Claw_Eval": { "name": "Claw-Eval", "dataset": "claw-eval/Claw-Eval", "lower_is_better": false, "models": [ { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 64.0, "date": "2026-04-27" }, { "model_id": "inclusionAI/Ring-2.6-1T", "short_name": "Ring-2.6-1T", "provider": "inclusionAI", "score": 63.82, "date": "2026-05-14" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 62.7, "date": "2026-04-03" }, { "model_id": "XiaomiMiMo/MiMo-V2.5", "short_name": "MiMo-V2.5", "provider": "XiaomiMiMo", "score": 62.1, "date": "2026-04-27" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 61.5, "date": "2026-04-14" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 58.4, "date": "2026-04-22" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 57.8, "date": "2026-02-16" }, { "model_id": "deepseek-ai/DeepSeek-V4-Flash", "short_name": "DeepSeek-V4-Flash", "provider": "deepseek-ai", "score": 57.8, "date": "2026-04-22" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 52.8, "date": "2026-01-01" }, { "model_id": "MiniMaxAI/MiniMax-M2.7", "short_name": "MiniMax-M2.7", "provider": "MiniMaxAI", "score": 49.7, "date": "2026-04-09" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 6.8, "date": "2026-03-10" } ] }, "ResearchClawBench": { "name": "ResearchClawBench", "dataset": "InternScience/ResearchClawBench", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 18.19, "date": "2026-04-03" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 18.0, "date": "2026-04-14" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 17.12, "date": "2026-04-22" }, { "model_id": "XiaomiMiMo/MiMo-V2.5", "short_name": "MiMo-V2.5", "provider": "XiaomiMiMo", "score": 16.91, "date": "2026-04-27" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 14.23, "date": "2026-02-16" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 13.96, "date": "2026-01-01" } ] }, "Video_MME_v2": { "name": "Video-MME-v2", "dataset": "MME-Benchmarks/Video-MME-v2", "lower_is_better": false, "models": [ { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 61.1, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 55.9, "date": "2026-02-16" } ] }, "arguana": { "name": "arguana", "dataset": "mteb/arguana", "lower_is_better": false, "models": [ { "model_id": "google/embeddinggemma-300m", "short_name": "embeddinggemma-300m", "provider": "google", "score": 71.53, "date": "2025-07-17" }, { "model_id": "Snowflake/snowflake-arctic-embed-l-v2.0", "short_name": "snowflake-arctic-embed-l-v2.0", "provider": "Snowflake", "score": 59.11, "date": "2024-11-08" }, { "model_id": "ibm-granite/granite-embedding-125m-english", "short_name": "granite-embedding-125m-english", "provider": "ibm-granite", "score": 58.4, "date": "2024-12-04" }, { "model_id": "Snowflake/snowflake-arctic-embed-m-v2.0", "short_name": "snowflake-arctic-embed-m-v2.0", "provider": "Snowflake", "score": 57.88, "date": "2024-11-08" }, { "model_id": "BAAI/bge-m3", "short_name": "bge-m3", "provider": "BAAI", "score": 54.04, "date": "2024-01-27" }, { "model_id": "nomic-ai/nomic-embed-text-v1.5", "short_name": "nomic-embed-text-v1.5", "provider": "nomic-ai", "score": 52.02, "date": "2024-02-10" }, { "model_id": "mteb/baseline-bm25s", "short_name": "baseline-bm25s", "provider": "mteb", "score": 49.28, "date": "2026-02-19" }, { "model_id": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", "short_name": "paraphrase-multilingual-mpnet-base-v2", "provider": "sentence-transformers", "score": 48.91, "date": "2022-03-02" }, { "model_id": "jinaai/jina-embeddings-v3", "short_name": "jina-embeddings-v3", "provider": "jinaai", "score": 43.29, "date": "2024-09-05" }, { "model_id": "novelcore/cosmos-small", "short_name": "cosmos-small", "provider": "novelcore", "score": 39.09, "date": "2026-05-27" }, { "model_id": "sentence-transformers/LaBSE", "short_name": "LaBSE", "provider": "sentence-transformers", "score": 34.18, "date": "2022-03-02" } ] }, "ScreenSpot_Pro": { "name": "SS-Pro", "dataset": "likaixin/ScreenSpot-Pro", "lower_is_better": false, "models": [ { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 70.4, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 70.3, "date": "2026-02-24" }, { "model_id": "inclusionAI/UI-Venus-1.5-30B-A3B", "short_name": "UI-Venus-1.5-30B-A3B", "provider": "inclusionAI", "score": 69.6, "date": "2026-02-09" }, { "model_id": "inclusionAI/UI-Venus-1.5-8B", "short_name": "UI-Venus-1.5-8B", "provider": "inclusionAI", "score": 68.4, "date": "2026-02-09" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 65.6, "date": "2026-02-16" }, { "model_id": "Salesforce/GTA1-32B", "short_name": "GTA1-32B", "provider": "Salesforce", "score": 63.6, "date": "2025-09-25" }, { "model_id": "inclusionAI/UI-Venus-Ground-72B", "short_name": "UI-Venus-Ground-72B", "provider": "inclusionAI", "score": 61.9, "date": "2025-08-16" }, { "model_id": "ByteDance-Seed/UI-TARS-1.5-7B", "short_name": "UI-TARS-1.5-7B", "provider": "ByteDance-Seed", "score": 61.6, "date": "2025-04-16" }, { "model_id": "inclusionAI/UI-Venus-1.5-2B", "short_name": "UI-Venus-1.5-2B", "provider": "inclusionAI", "score": 57.7, "date": "2026-02-09" }, { "model_id": "Salesforce/GTA1-7B", "short_name": "GTA1-7B", "provider": "Salesforce", "score": 55.5, "date": "2025-10-01" }, { "model_id": "inclusionAI/UI-Venus-Ground-7B", "short_name": "UI-Venus-Ground-7B", "provider": "inclusionAI", "score": 50.8, "date": "2025-08-15" }, { "model_id": "Qwen/Qwen2.5-VL-32B-Instruct", "short_name": "Qwen2.5-VL-32B-Instruct", "provider": "Qwen", "score": 48.0, "date": "2025-03-21" }, { "model_id": "KDEGroup/UI-AGILE-3B", "short_name": "UI-AGILE-3B", "provider": "KDEGroup", "score": 45.0, "date": "2025-08-07" }, { "model_id": "ByteDance-Seed/UI-TARS-72B-SFT", "short_name": "UI-TARS-72B-SFT", "provider": "ByteDance-Seed", "score": 38.1, "date": "2025-01-20" }, { "model_id": "ByteDance-Seed/UI-TARS-7B-SFT", "short_name": "UI-TARS-7B-SFT", "provider": "ByteDance-Seed", "score": 35.7, "date": "2025-01-20" }, { "model_id": "ByteDance-Seed/UI-TARS-2B-SFT", "short_name": "UI-TARS-2B-SFT", "provider": "ByteDance-Seed", "score": 27.7, "date": "2025-01-20" }, { "model_id": "Qwen/Qwen2.5-VL-7B-Instruct", "short_name": "Qwen2.5-VL-7B-Instruct", "provider": "Qwen", "score": 26.8, "date": "2025-01-26" }, { "model_id": "Qwen/Qwen2.5-VL-3B-Instruct", "short_name": "Qwen2.5-VL-3B-Instruct", "provider": "Qwen", "score": 16.1, "date": "2025-01-26" }, { "model_id": "zai-org/CogAgent", "short_name": "CogAgent", "provider": "zai-org", "score": 7.7, "date": "2023-12-15" }, { "model_id": "openbmb/MiniCPM-V-2", "short_name": "MiniCPM-V-2", "provider": "openbmb", "score": 3.0, "date": "2024-04-09" }, { "model_id": "Qwen/Qwen-VL", "short_name": "Qwen-VL", "provider": "Qwen", "score": 0.1, "date": "2023-08-18" } ] }, "APEX_v1_extended": { "name": "APEX-v1-extended", "dataset": "mercor/APEX-v1-extended", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 51.7, "date": "2025-12-22" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 49.0, "date": "2026-02-11" } ] }, "vlabench_primitive_ft_lerobot_video": { "name": "vlabench_primitive_ft_lerobot_video", "dataset": "VLABench/vlabench_primitive_ft_lerobot_video", "lower_is_better": false, "models": [ { "model_id": "lerobot/pi0_base", "short_name": "pi0_base", "provider": "lerobot", "score": 44.5, "date": "2025-09-09" }, { "model_id": "lerobot/pi05_base", "short_name": "pi05_base", "provider": "lerobot", "score": 42.0, "date": "2025-09-09" }, { "model_id": "nvidia/GR00T-N1-2B", "short_name": "GR00T-N1-2B", "provider": "nvidia", "score": 39.7, "date": "2025-03-05" }, { "model_id": "lerobot/pi0fast-base", "short_name": "pi0fast-base", "provider": "lerobot", "score": 34.1, "date": "2026-01-09" } ] }, "evasionBench": { "name": "EvasionBench", "dataset": "FutureMa/EvasionBench", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 82.91, "date": "2025-12-22" }, { "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "short_name": "Qwen3-Coder-480B-A35B-Instruct", "provider": "Qwen", "score": 78.16, "date": "2025-07-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 71.31, "date": "2025-12-20" }, { "model_id": "moonshotai/Kimi-K2-Instruct-0905", "short_name": "Kimi-K2-Instruct-0905", "provider": "moonshotai", "score": 66.68, "date": "2025-09-03" } ] }, "aime2026": { "name": "AIME 2026", "dataset": "MathArena/aime_2026", "lower_is_better": false, "models": [ { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 96.67, "date": "2026-02-01" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 96.4, "date": "2026-04-14" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 95.83, "date": "2026-01-01" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 95.83, "date": "2026-02-11" }, { "model_id": "inclusionAI/Ring-2.6-1T", "short_name": "Ring-2.6-1T", "provider": "inclusionAI", "score": 95.83, "date": "2026-05-14" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 95.3, "date": "2026-04-03" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 94.1, "date": "2026-04-21" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 93.33, "date": "2026-02-16" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 92.7, "date": "2026-04-15" }, { "model_id": "LGAI-EXAONE/EXAONE-4.5-33B", "short_name": "EXAONE-4.5-33B", "provider": "LGAI-EXAONE", "score": 92.6, "date": "2026-04-04" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 90.83, "date": "2026-02-24" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 90.0, "date": "2026-03-10" }, { "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "short_name": "Qwen3-30B-A3B-Thinking-2507", "provider": "Qwen", "score": 87.5, "date": "2025-07-29" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 82.5, "date": "2025-08-05" }, { "model_id": "inclusionAI/Ling-2.6-flash", "short_name": "Ling-2.6-flash", "provider": "inclusionAI", "score": 73.85, "date": "2026-04-28" } ] }, "terminalBench": { "name": "Terminal-Bench 2.0", "dataset": "harborframework/terminal-bench-2.0", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 69.0, "date": "2026-04-03" }, { "model_id": "XiaomiMiMo/MiMo-V2.5-Pro", "short_name": "MiMo-V2.5-Pro", "provider": "XiaomiMiMo", "score": 68.4, "date": "2026-04-27" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 67.9, "date": "2026-04-22" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 66.7, "date": "2026-04-14" }, { "model_id": "XiaomiMiMo/MiMo-V2.5", "short_name": "MiMo-V2.5", "provider": "XiaomiMiMo", "score": 65.8, "date": "2026-04-27" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 59.3, "date": "2026-04-21" }, { "model_id": "MiniMaxAI/MiniMax-M2.7", "short_name": "MiniMax-M2.7", "provider": "MiniMaxAI", "score": 57.0, "date": "2026-04-09" }, { "model_id": "deepseek-ai/DeepSeek-V4-Flash", "short_name": "DeepSeek-V4-Flash", "provider": "deepseek-ai", "score": 56.9, "date": "2026-04-22" }, { "model_id": "tencent/Hy3-preview", "short_name": "Hy3-preview", "provider": "tencent", "score": 54.4, "date": "2026-04-13" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 52.5, "date": "2026-02-16" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 52.4, "date": "2026-02-11" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 51.5, "date": "2026-04-15" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 51.0, "date": "2026-02-01" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 49.4, "date": "2026-02-24" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 43.2, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 41.6, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3-Coder-Next", "short_name": "Qwen3-Coder-Next", "provider": "Qwen", "score": 36.2, "date": "2026-01-30" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 35.7, "date": "2025-11-04" }, { "model_id": "poolside/Laguna-XS.2", "short_name": "Laguna-XS.2", "provider": "poolside", "score": 35.7, "date": "2026-04-23" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 33.4, "date": "2025-12-22" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 31.0, "date": "2026-03-10" }, { "model_id": "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "RedHatAI", "score": 31.0, "date": "2026-03-26" }, { "model_id": "MuVeraAI/Laguna-XS.2", "short_name": "Laguna-XS.2", "provider": "MuVeraAI", "score": 30.1, "date": "2026-04-28" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 30.0, "date": "2025-10-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 29.2, "date": "2025-12-20" }, { "model_id": "moonshotai/Kimi-K2-Instruct", "short_name": "Kimi-K2-Instruct", "provider": "moonshotai", "score": 27.8, "date": "2025-07-11" }, { "model_id": "nvidia/Nemotron-Terminal-32B", "short_name": "Nemotron-Terminal-32B", "provider": "nvidia", "score": 27.4, "date": "2026-02-17" }, { "model_id": "zai-org/GLM-4.6", "short_name": "GLM-4.6", "provider": "zai-org", "score": 24.5, "date": "2025-09-29" }, { "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "short_name": "Qwen3-Coder-480B-A35B-Instruct", "provider": "Qwen", "score": 23.9, "date": "2025-07-22" }, { "model_id": "nvidia/Nemotron-Terminal-14B", "short_name": "Nemotron-Terminal-14B", "provider": "nvidia", "score": 20.2, "date": "2026-02-17" }, { "model_id": "nvidia/Nemotron-Terminal-8B", "short_name": "Nemotron-Terminal-8B", "provider": "nvidia", "score": 13.0, "date": "2026-02-17" } ] }, "hmmt2026": { "name": "HMMT Feb 2026", "dataset": "MathArena/hmmt_feb_2026", "lower_is_better": false, "models": [ { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 92.7, "date": "2026-04-14" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 87.88, "date": "2026-02-16" }, { "model_id": "internlm/Intern-S2-Preview", "short_name": "Intern-S2-Preview", "provider": "internlm", "score": 87.31, "date": "2026-05-15" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 87.12, "date": "2026-01-01" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 86.36, "date": "2026-02-01" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 86.36, "date": "2026-02-11" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 84.85, "date": "2026-03-10" }, { "model_id": "Qwen/Qwen3.6-27B", "short_name": "Qwen3.6-27B", "provider": "Qwen", "score": 84.3, "date": "2026-04-21" }, { "model_id": "Qwen/Qwen3.6-35B-A3B", "short_name": "Qwen3.6-35B-A3B", "provider": "Qwen", "score": 83.6, "date": "2026-04-15" }, { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 82.6, "date": "2026-04-03" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 81.06, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "short_name": "Qwen3-30B-A3B-Thinking-2507", "provider": "Qwen", "score": 78.79, "date": "2025-07-29" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 53.03, "date": "2025-08-05" }, { "model_id": "inclusionAI/Ling-2.6-flash", "short_name": "Ling-2.6-flash", "provider": "inclusionAI", "score": 49.29, "date": "2026-04-28" } ] }, "yc_bench": { "name": "YC-Bench", "dataset": "collinear-ai/yc-bench", "lower_is_better": false, "models": [ { "model_id": "zai-org/GLM-5.1", "short_name": "GLM-5.1", "provider": "zai-org", "score": 1510772.0, "date": "2026-04-03" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 1208190.0, "date": "2026-02-11" }, { "model_id": "deepseek-ai/DeepSeek-V4-Pro", "short_name": "DeepSeek-V4-Pro", "provider": "deepseek-ai", "score": 1066426.0, "date": "2026-04-22" }, { "model_id": "moonshotai/Kimi-K2.6", "short_name": "Kimi-K2.6", "provider": "moonshotai", "score": 511137.0, "date": "2026-04-14" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 408822.0, "date": "2026-01-01" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 398410.0, "date": "2025-12-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 230465.0, "date": "2026-02-12" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 90787.0, "date": "2026-02-16" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 0.0, "date": "2026-02-24" } ] }, "MDPBench": { "name": "MDPBench", "dataset": "Delores-Lin/MDPBench", "lower_is_better": false, "models": [ { "model_id": "rednote-hilab/dots.mocr", "short_name": "dots.mocr", "provider": "rednote-hilab", "score": 80.5, "date": "2026-03-19" }, { "model_id": "datalab-to/chandra-ocr-2", "short_name": "chandra-ocr-2", "provider": "datalab-to", "score": 79.7, "date": "2026-03-16" }, { "model_id": "rednote-hilab/dots.ocr", "short_name": "dots.ocr", "provider": "rednote-hilab", "score": 76.5, "date": "2025-07-30" }, { "model_id": "Qwen/Qwen3-VL-8B-Instruct", "short_name": "Qwen3-VL-8B-Instruct", "provider": "Qwen", "score": 68.3, "date": "2025-10-11" }, { "model_id": "zai-org/GLM-OCR", "short_name": "GLM-OCR", "provider": "zai-org", "score": 67.3, "date": "2026-01-30" }, { "model_id": "lightonai/LightOnOCR-2-1B", "short_name": "LightOnOCR-2-1B", "provider": "lightonai", "score": 63.9, "date": "2026-01-16" }, { "model_id": "tiiuae/Falcon-OCR", "short_name": "Falcon-OCR", "provider": "tiiuae", "score": 56.3, "date": "2026-02-22" }, { "model_id": "deepseek-ai/DeepSeek-OCR", "short_name": "DeepSeek-OCR", "provider": "deepseek-ai", "score": 51.8, "date": "2025-10-17" }, { "model_id": "opendatalab/MinerU2.5-2509-1.2B", "short_name": "MinerU2.5-2509-1.2B", "provider": "opendatalab", "score": 46.3, "date": "2025-09-17" } ] } }, "logos": { "XiaomiMiMo": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg", "Snowflake": "https://cdn-avatars.huggingface.co/v1/production/uploads/64dc52cf858f8a41c12fc819/O9-MWzRjWzbNP_DQlMb-7.png", "espnet": "https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png", "KDEGroup": "https://cdn-avatars.huggingface.co/v1/production/uploads/688dc279b2d5ebe029e8aafe/GZis_Qxofgb67RkJsllJ6.png", "meituan-longcat": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png", "lerobot": "https://cdn-avatars.huggingface.co/v1/production/uploads/631ce4b244503b72277fc89f/pcLUTLsvMQiR-ujlTgLYF.png", "opendatalab": "https://cdn-avatars.huggingface.co/v1/production/uploads/639c3afa7432f2f5d16b7296/yqxxBknyeqkGnYsjoaR4M.png", "sentence-transformers": "https://cdn-avatars.huggingface.co/v1/production/uploads/1609621322398-5eff4688ff69163f6f59e66c.png", "mistral-community": "https://cdn-avatars.huggingface.co/v1/production/uploads/6141a88b3a0ec78603c9e784/HAdAzcs1CDB9OxgxT2W9K.png", "UsefulSensors": "https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/TXG6u2PtGnohUXBQwj2Ks.png", "JGOS-Model": "https://cdn-avatars.huggingface.co/v1/production/uploads/66e54edddba1e4fee4500a5a/mHGK5-lBrEy1xKZmRqtfc.png", "meta-llama": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png", "Lightricks": "https://cdn-avatars.huggingface.co/v1/production/uploads/1652783139615-628375426db5127097cf5442.png", "nvidia": "https://cdn-avatars.huggingface.co/v1/production/uploads/65df9200dc3292a8983e5017/Vs5FPVCH-VZBipV3qKTuy.png", "datalab-to": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png", "distil-whisper": "https://cdn-avatars.huggingface.co/v1/production/uploads/61f91cf54a8e5a275b2b3e7c/cUNzV7MAYi8lo9LsCYixp.png", "poolside": "https://cdn-avatars.huggingface.co/v1/production/uploads/699484cbe85a4b61cbc5ee0f/GpYWuz-CovEFgbPOW21dZ.png", "JetBrains": "https://cdn-avatars.huggingface.co/v1/production/uploads/6645d54b780d46f274dd4145/8RHS3MzGGBFWtH1-bynJ0.png", "PolarSeeker": "https://www.gravatar.com/avatar/55d9cc59db4e30206a307c186cc6d5bb?d=retro&size=100", "HuggingFaceTB": "https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png", "abacusai": "https://cdn-avatars.huggingface.co/v1/production/uploads/63128dd099791aa61d180c72/dJ6uR23m09M-YEafBpmea.png", "RedHatAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/60466e4b4f40b01b66151416/cdABRow21BL0sl1vSVTPk.png", "google": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png", "speechbrain": "https://cdn-avatars.huggingface.co/v1/production/uploads/1663000279893-60243f18c1f3c79f98e4b382.png", "mteb": "https://cdn-avatars.huggingface.co/v1/production/uploads/5ff5943752c26e9bc240bada/OrZxdlg8doDNO2TZ6Q58G.png", "Salesforce": "https://cdn-avatars.huggingface.co/v1/production/uploads/1602756670970-noauth.jpeg", "openbmb": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png", "AIDC-AI": "https://cdn-avatars.huggingface.co/v1/production/uploads/666a9d46a638e57bb7907929/CRc-9MCuH2q9hjTScyTPE.png", "nex-agi": "https://cdn-avatars.huggingface.co/v1/production/uploads/65435cad429b80b14922ab8d/a_O9jT_daz_NXTfxtcw6S.png", "zai-org": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png", "novelcore": "https://cdn-avatars.huggingface.co/v1/production/uploads/684012f0596dd502f7b54d98/Kf-JqKuXw7T-erLVhi_gS.png", "Multilingual-Multimodal-NLP": "https://www.gravatar.com/avatar/cdabcf4c0ac6a92af4940fe0eb6924eb?d=retro&size=100", "deepseek-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png", "EleutherAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/1614054059123-603481bb60e3dd96631c9095.png", "inclusionAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/662e1f9da266499277937d33/fyKuazRifqiaIO34xrhhm.jpeg", "MuVeraAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/65ba8b772db90cb31145dbee/SLiEhXDF9tuzu5GswaqgR.png", "jinaai": "https://cdn-avatars.huggingface.co/v1/production/uploads/603763514de52ff951d89793/wD54VbAHHyHop3uYlJKl4.png", "FINAL-Bench": "https://cdn-avatars.huggingface.co/v1/production/uploads/6905bc786cb49b1f11d32728/VZmuKH-liifeL2GCXlwka.jpeg", "LGAI-EXAONE": "https://cdn-avatars.huggingface.co/v1/production/uploads/66a899a72f11aaf66001a8dc/UfdrP3GMo9pNT62BaMnhw.png", "GSAI-ML": "https://cdn-avatars.huggingface.co/v1/production/uploads/624f909eac5dd186b01ac3f5/6tfvx3XT5Sx6YDGl7KUAU.jpeg", "internlm": "https://cdn-avatars.huggingface.co/v1/production/uploads/6432683407bad11484a68457/Q3Y0dL79GcsnaBCGRMooZ.png", "robbyant": "https://cdn-avatars.huggingface.co/v1/production/uploads/67aeffda7330db26f93cd62f/ZTuImney4XzRmBHyUL47F.png", "ai21labs": "https://cdn-avatars.huggingface.co/v1/production/uploads/67baf6e5489cb4dc98a4bff4/9Rkvk1VGhK1woxWvhqDyb.png", "moonshotai": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg", "TIGER-Lab": "https://cdn-avatars.huggingface.co/v1/production/uploads/6313a86154e6e5d9f0f94e04/Noi3Qq3RYz8Jdq6BaFteq.png", "tencent": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/Lp3m-XLpjQGwBItlvn69q.png", "nomic-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/641f01fe6d51620635e118e9/wy0ax27ok1-uHWoUAHSEs.png", "HelpingAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png", "Qwen": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png", "abr-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/68e56cb442da034c65305b1b/DYHvx48l1-zt0G5VRj95j.png", "rednote-hilab": "https://cdn-avatars.huggingface.co/v1/production/uploads/6807a1d6504547b3554b9c73/WgnnQDsz7FqnyTtv8mmRO.png", "MiniMaxAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg", "CohereLabs": "https://cdn-avatars.huggingface.co/v1/production/uploads/1678549441248-5e70f6048ce3c604d78fe133.png", "ibm-granite": "https://cdn-avatars.huggingface.co/v1/production/uploads/639bcaa2445b133a4e942436/CEW-OjXkRkDNmTxSu8Egh.png", "tiiuae": "https://cdn-avatars.huggingface.co/v1/production/uploads/61a8d1aac664736898ffc84f/AT6cAB5ZNwCcqFMal71WD.jpeg", "soundsgoodai": "https://www.gravatar.com/avatar/2e7e38eb40d5e4429d62544c73640cc6?d=retro&size=100", "ByteDance-Seed": "https://cdn-avatars.huggingface.co/v1/production/uploads/6535c9e88bde2fae19b6fb25/flkDUqd_YEuFsjeNET3r-.png", "Muse-research": "https://cdn-avatars.huggingface.co/v1/production/uploads/69625a973527f984b1d0cec1/sTKLCqiISaF3tmrm_G_cq.png", "kyutai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6355a3c1805be5a8f30fea49/8xGdIOlfkopZfhbMitw_k.jpeg", "baidu": "https://cdn-avatars.huggingface.co/v1/production/uploads/64f187a2cc1c03340ac30498/TYYUxK8xD1AxExFMWqbZD.png", "jdopensource": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png", "openai": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png", "inspatio": "https://www.gravatar.com/avatar/0e9900671188e39a6c9589e882d99e9c?d=retro&size=100", "docling-project": "https://cdn-avatars.huggingface.co/v1/production/uploads/63c64dd877caf00391004e20/aWC70TyF2UhxyaUh1alpu.png", "stepfun-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/644f7e6233ac8f46fa0b9e26/CmF2ocXhkr2UtHXgmwq7-.png", "infly": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png", "lightonai": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png", "mistralai": "https://cdn-avatars.huggingface.co/v1/production/uploads/634c17653d11eaedd88b314d/9OgyfKstSZtbmsmuG8MbU.png", "microsoft": "https://cdn-avatars.huggingface.co/v1/production/uploads/1583646260758-5e64858c87403103f9f1055d.png", "facebook": "https://cdn-avatars.huggingface.co/v1/production/uploads/1592839207516-noauth.png", "01-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6536187279f1de44b5e02d0f/-T8Xw0mX67_R73b7Re1y-.png", "BAAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/1664511063789-632c234f42c386ebd2710434.png" }, "colors": { "01-ai": "#6366f1", "AIDC-AI": "#0d9488", "BAAI": "#d97706", "ByteDance-Seed": "#e11d48", "CohereLabs": "#7c3aed", "EleutherAI": "#16a34a", "FINAL-Bench": "#2563eb", "GSAI-ML": "#ea580c", "HelpingAI": "#8b5cf6", "HuggingFaceTB": "#0891b2", "JGOS-Model": "#c026d3", "JetBrains": "#65a30d", "KDEGroup": "#dc2626", "LGAI-EXAONE": "#0284c7", "Lightricks": "#a21caf", "MiniMaxAI": "#059669", "MuVeraAI": "#9333ea", "Multilingual-Multimodal-NLP": "#ca8a04", "Muse-research": "#be185d", "PolarSeeker": "#0369a1", "Qwen": "#6366f1", "RedHatAI": "#0d9488", "Salesforce": "#d97706", "Snowflake": "#e11d48", "TIGER-Lab": "#7c3aed", "UsefulSensors": "#16a34a", "XiaomiMiMo": "#2563eb", "abacusai": "#ea580c", "abr-ai": "#8b5cf6", "ai21labs": "#0891b2", "baidu": "#c026d3", "datalab-to": "#65a30d", "deepseek-ai": "#dc2626", "distil-whisper": "#0284c7", "docling-project": "#a21caf", "espnet": "#059669", "facebook": "#9333ea", "google": "#ca8a04", "ibm-granite": "#be185d", "inclusionAI": "#0369a1", "infly": "#6366f1", "inspatio": "#0d9488", "internlm": "#d97706", "jdopensource": "#e11d48", "jinaai": "#7c3aed", "kyutai": "#16a34a", "lerobot": "#2563eb", "lightonai": "#ea580c", "meituan-longcat": "#8b5cf6", "meta-llama": "#0891b2", "microsoft": "#c026d3", "mistral-community": "#65a30d", "mistralai": "#dc2626", "moonshotai": "#0284c7", "mteb": "#a21caf", "nex-agi": "#059669", "nomic-ai": "#9333ea", "novelcore": "#ca8a04", "nvidia": "#be185d", "openai": "#0369a1", "openbmb": "#6366f1", "opendatalab": "#0d9488", "poolside": "#d97706", "rednote-hilab": "#e11d48", "robbyant": "#7c3aed", "sentence-transformers": "#16a34a", "soundsgoodai": "#2563eb", "speechbrain": "#ea580c", "stepfun-ai": "#8b5cf6", "tencent": "#0891b2", "tiiuae": "#c026d3", "zai-org": "#65a30d" }, "generated_at": "2026-06-08T08:00:35.704765+00:00" }