{"benchmark_id":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","parent_benchmark":null,"categories":["reasoning","spatial_reasoning"],"modality":"text","multilingual":false,"max_score":1.0,"language":"en","description":"A graph reasoning benchmark that evaluates language models' ability to perform breadth-first search (BFS) operations on graphs with context length under 128k tokens, returning nodes reachable at specified depths.","paper_link":null,"implementation_link":null,"verified":false,"created_at":"2026-05-07T16:53:23.582677+00:00","updated_at":"2026-07-05T18:27:49.280738+00:00","statistics":{"total_models":11,"average_score":0.6621818181818181,"min_score":0.25,"max_score":0.94,"score_stddev":0.20970875908355296,"verified_count":0,"self_reported_count":11},"child_benchmarks":[],"linked_dataset":null,"models":[{"rank":1,"model_id":"gpt-5.2-2025-12-11","model_name":"GPT-5.2","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.94,"normalized_score":0.94,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-2/","analysis_method":"GPT-5.2 Thinking - GraphWalks BFS <128k.","verification_date":null,"provider_id":"openai","input_cost_per_million":1.75,"output_cost_per_million":14.0,"context_window":400000,"announcement_date":"2025-12-11","param_count":null,"is_open_source":false,"is_new":false,"best_latency":2.0,"latency_provider":"OpenAI","best_throughput":100.0,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":2,"model_id":"gpt-5.4","model_name":"GPT-5.4","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.93,"normalized_score":0.93,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-4/","analysis_method":"Graphwalks BFS 0K-128K. Reasoning effort xhigh.","verification_date":null,"provider_id":"openai","input_cost_per_million":2.5,"output_cost_per_million":15.0,"context_window":1000000,"announcement_date":"2026-03-05","param_count":null,"is_open_source":false,"is_new":false,"best_latency":3.0,"latency_provider":"OpenAI","best_throughput":50.0,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":3,"model_id":"gpt-5-2025-08-07","model_name":"GPT-5","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.783,"normalized_score":0.783,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-for-developers/","analysis_method":"Graphwalks BFS (<128k) long-context reasoning.","verification_date":null,"provider_id":null,"input_cost_per_million":null,"output_cost_per_million":null,"context_window":null,"announcement_date":"2025-08-07","param_count":null,"is_open_source":false,"is_new":false,"best_latency":null,"latency_provider":null,"best_throughput":null,"throughput_provider":null,"context_provider":null},{"rank":4,"model_id":"gpt-5.4-mini","model_name":"GPT-5.4 mini","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.763,"normalized_score":0.763,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-4-mini-and-nano/","analysis_method":"Graphwalks BFS 0K-128K. Reasoning effort xhigh.","verification_date":null,"provider_id":"openai","input_cost_per_million":0.75,"output_cost_per_million":4.5,"context_window":400000,"announcement_date":"2026-03-17","param_count":null,"is_open_source":false,"is_new":false,"best_latency":null,"latency_provider":"OpenAI","best_throughput":null,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":5,"model_id":"gpt-5.4-nano","model_name":"GPT-5.4 nano","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.734,"normalized_score":0.734,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-4-mini-and-nano/","analysis_method":"Graphwalks BFS 0K-128K. Reasoning effort xhigh.","verification_date":null,"provider_id":"openai","input_cost_per_million":0.2,"output_cost_per_million":1.25,"context_window":400000,"announcement_date":"2026-03-17","param_count":null,"is_open_source":false,"is_new":false,"best_latency":null,"latency_provider":"OpenAI","best_throughput":null,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":6,"model_id":"gpt-4.5","model_name":"GPT-4.5","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.723,"normalized_score":0.723,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/gpt-4-1/","analysis_method":"Accuracy","verification_date":null,"provider_id":null,"input_cost_per_million":null,"output_cost_per_million":null,"context_window":null,"announcement_date":"2025-02-27","param_count":null,"is_open_source":false,"is_new":false,"best_latency":null,"latency_provider":null,"best_throughput":null,"throughput_provider":null,"context_provider":null},{"rank":7,"model_id":"gpt-4.1-2025-04-14","model_name":"GPT-4.1","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.617,"normalized_score":0.617,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-for-developers/","analysis_method":"Standard benchmark","verification_date":null,"provider_id":"openai","input_cost_per_million":2.0,"output_cost_per_million":8.0,"context_window":1047576,"announcement_date":"2025-04-14","param_count":null,"is_open_source":false,"is_new":false,"best_latency":10.0,"latency_provider":"OpenAI","best_throughput":100.0,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":7,"model_id":"gpt-4.1-mini-2025-04-14","model_name":"GPT-4.1 mini","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.617,"normalized_score":0.617,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/introducing-gpt-5-for-developers/","analysis_method":"Standard benchmark","verification_date":null,"provider_id":"openai","input_cost_per_million":0.4,"output_cost_per_million":1.6,"context_window":1047576,"announcement_date":"2025-04-14","param_count":null,"is_open_source":false,"is_new":false,"best_latency":5.0,"latency_provider":"OpenAI","best_throughput":150.0,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":9,"model_id":"o3-mini","model_name":"o3-mini","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.51,"normalized_score":0.51,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/gpt-4-1/","analysis_method":"benchmark score","verification_date":null,"provider_id":null,"input_cost_per_million":null,"output_cost_per_million":null,"context_window":null,"announcement_date":"2025-01-30","param_count":null,"is_open_source":false,"is_new":false,"best_latency":null,"latency_provider":null,"best_throughput":null,"throughput_provider":null,"context_provider":null},{"rank":10,"model_id":"gpt-4o-2024-08-06","model_name":"GPT-4o","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.417,"normalized_score":0.417,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/gpt-4-1/","analysis_method":"Accuracy","verification_date":null,"provider_id":"openai","input_cost_per_million":2.5,"output_cost_per_million":10.0,"context_window":128000,"announcement_date":"2024-08-06","param_count":null,"is_open_source":false,"is_new":false,"best_latency":0.5,"latency_provider":"OpenAI","best_throughput":132.0,"throughput_provider":"OpenAI","context_provider":"OpenAI"},{"rank":11,"model_id":"gpt-4.1-nano-2025-04-14","model_name":"GPT-4.1 nano","organization_id":"openai","organization_name":"OpenAI","organization_country":"US","score":0.25,"normalized_score":0.25,"verified":false,"self_reported":true,"self_reported_source":"https://openai.com/index/gpt-4-1/","analysis_method":"Standard benchmark","verification_date":null,"provider_id":"openai","input_cost_per_million":0.1,"output_cost_per_million":0.4,"context_window":1047576,"announcement_date":"2025-04-14","param_count":null,"is_open_source":false,"is_new":false,"best_latency":2.0,"latency_provider":"OpenAI","best_throughput":200.0,"throughput_provider":"OpenAI","context_provider":"OpenAI"}]}