antoineedy commited on
Commit
baaa011
·
1 Parent(s): bf183ba

remove deprecated tabs

Browse files
Files changed (6) hide show
  1. .gitignore +2 -1
  2. app.py +230 -229
  3. app/utils.py +28 -27
  4. data/deprecated_model_handler.py +13 -3
  5. data/model_handler.py +23 -11
  6. results +1 -0
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  .venv
2
  *.json
3
- *.pyc
 
 
1
  .venv
2
  *.json
3
+ *.pyc
4
+ .DS_Store
app.py CHANGED
@@ -37,23 +37,23 @@ def main():
37
  num_models_2 = len(data_benchmark_2)
38
 
39
  # Get deprecated results
40
- deprecated_model_handler = DeprecatedModelHandler()
41
- initial_metric = "ndcg_at_5"
42
 
43
- deprecated_model_handler.get_vidore_data(initial_metric)
44
- deprecated_data_benchmark_1 = deprecated_model_handler.render_df(initial_metric, benchmark_version=1)
45
- deprecated_data_benchmark_1 = add_rank_and_format(deprecated_data_benchmark_1, benchmark_version=1)
46
 
47
- deprecated_data_benchmark_2 = deprecated_model_handler.render_df(initial_metric, benchmark_version=2)
48
- deprecated_data_benchmark_2 = add_rank_and_format(deprecated_data_benchmark_2, benchmark_version=2)
49
 
50
- deprecated_num_datasets_1 = len(deprecated_data_benchmark_1.columns) - 3
51
- deprecated_num_scores_1 = len(deprecated_data_benchmark_1) * deprecated_num_datasets_1
52
- deprecated_num_models_1 = len(deprecated_data_benchmark_1)
53
 
54
- deprecated_num_datasets_2 = len(deprecated_data_benchmark_2.columns) - 3
55
- deprecated_num_scores_2 = len(deprecated_data_benchmark_2) * deprecated_num_datasets_2
56
- deprecated_num_models_2 = len(deprecated_data_benchmark_2)
57
 
58
  css = """
59
  table > thead {
@@ -84,7 +84,7 @@ def main():
84
  gr.Markdown("# ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases 📚🔍")
85
  with gr.Row(variant="panel"):
86
  gr.Markdown("""
87
- ### ⚠️ To access the ViDoRe V3 results, please refer directly to the [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
88
  **ViDoRe V3 is fully integrated into MTEB, which provides a unified platform for evaluating embedding models across various tasks, including document retrieval.**
89
  **We decided to display ViDoRe V3 results directly on MTEB to leverage its extensive features and community.**
90
  """)
@@ -309,223 +309,224 @@ def main():
309
 
310
  ### Deprecated Tabs ###
311
 
312
- with gr.TabItem("⚠️ Deprecated ViDoRe V2"):
313
- gr.Markdown(
314
- "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
315
- "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
316
- "which is no longer maintained. Results should be computed using the "
317
- "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
318
- "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
319
- )
320
- gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
321
- gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe V2: A new visual Document Retrieval Benchmark 📚🔍")
322
- gr.Markdown("### A harder dataset benchmark for visual document retrieval 👀")
323
-
324
- gr.Markdown(
325
- """
326
- Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab.
327
-
328
- Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models.
329
- """
330
- )
331
- deprecated_datasets_columns_2 = list(deprecated_data_benchmark_2.columns[3:])
332
-
333
- with gr.Row():
334
- deprecated_metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
335
- deprecated_research_textbox_2 = gr.Textbox(
336
- placeholder="🔍 Search Models... [press enter]",
337
- label="Filter Models by Name",
338
- )
339
- deprecated_column_checkboxes_2 = gr.CheckboxGroup(
340
- choices=deprecated_datasets_columns_2, value=deprecated_datasets_columns_2, label="Select Columns to Display"
341
- )
342
-
343
- with gr.Row():
344
- deprecated_datatype_2 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_2 + 1)
345
- deprecated_dataframe_2 = gr.Dataframe(deprecated_data_benchmark_2, datatype=deprecated_datatype_2, type="pandas")
346
-
347
- def deprecated_update_data_2(metric, search_term, selected_columns):
348
- deprecated_model_handler.get_vidore_data(metric)
349
- data = deprecated_model_handler.render_df(metric, benchmark_version=2)
350
- data = add_rank_and_format(data, benchmark_version=2, selected_columns=selected_columns)
351
- data = filter_models(data, search_term)
352
- # data = remove_duplicates(data) # Add this line
353
- if selected_columns:
354
- data = data[["Rank", "Model", "Average"] + selected_columns]
355
- return data
356
-
357
- with gr.Row():
358
- deprecated_refresh_button_2 = gr.Button("Refresh")
359
- deprecated_refresh_button_2.click(
360
- deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
361
- inputs=[deprecated_metric_dropdown_2],
362
- outputs=deprecated_dataframe_2,
363
- concurrency_limit=20,
364
- )
365
-
366
- with gr.Row():
367
- gr.Markdown(
368
- """
369
- **Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side.
370
- Those numbers are not numbers obtained from the organisations that released those models.
371
- """
372
- )
373
-
374
- # Automatically refresh the dataframe when the dropdown value changes
375
- deprecated_metric_dropdown_2.change(
376
- deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
377
- inputs=[deprecated_metric_dropdown_2],
378
- outputs=deprecated_dataframe_2,
379
- )
380
- deprecated_research_textbox_2.submit(
381
- lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
382
- inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
383
- outputs=deprecated_dataframe_2,
384
- )
385
- deprecated_column_checkboxes_2.change(
386
- lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
387
- inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
388
- outputs=deprecated_dataframe_2,
389
- )
390
-
391
- gr.Markdown(
392
- f"""
393
- - **Total Datasets**: {deprecated_num_datasets_2}
394
- - **Total Scores**: {deprecated_num_scores_2}
395
- - **Total Models**: {deprecated_num_models_2}
396
- """
397
- + r"""
398
- Please consider citing:
399
-
400
- ```bibtex
401
- @misc{faysse2024colpaliefficientdocumentretrieval,
402
- title={ColPali: Efficient Document Retrieval with Vision Language Models},
403
- author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
404
- year={2024},
405
- eprint={2407.01449},
406
- archivePrefix={arXiv},
407
- primaryClass={cs.IR},
408
- url={https://arxiv.org/abs/2407.01449},
409
- }
410
-
411
- @misc{macé2025vidorebenchmarkv2raising,
412
- title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
413
- author={Quentin Macé and António Loison and Manuel Faysse},
414
- year={2025},
415
- eprint={2505.17166},
416
- archivePrefix={arXiv},
417
- primaryClass={cs.IR},
418
- url={https://arxiv.org/abs/2505.17166},
419
- }
420
- ```
421
- """
422
- )
423
-
424
- with gr.TabItem("⚠️ Deprecated ViDoRe V1"):
425
- gr.Markdown(
426
- "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
427
- "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
428
- "which is no longer maintained. Results should be computed using the "
429
- "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
430
- "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
431
- )
432
- gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
433
- gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe: The Visual Document Retrieval Benchmark 1 📚🔍")
434
- gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀")
435
-
436
- gr.Markdown(
437
- """
438
- Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab.
439
-
440
- Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
441
- """
442
- )
443
- deprecated_datasets_columns_1 = list(deprecated_data_benchmark_1.columns[3:])
444
-
445
- with gr.Row():
446
- deprecated_metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
447
- deprecated_research_textbox_1 = gr.Textbox(
448
- placeholder="🔍 Search Models... [press enter]",
449
- label="Filter Models by Name",
450
- )
451
- deprecated_column_checkboxes_1 = gr.CheckboxGroup(
452
- choices=deprecated_datasets_columns_1, value=deprecated_datasets_columns_1, label="Select Columns to Display"
453
- )
454
-
455
- with gr.Row():
456
- deprecated_datatype_1 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_1 + 1)
457
- deprecated_dataframe_1 = gr.Dataframe(deprecated_data_benchmark_1, datatype=deprecated_datatype_1, type="pandas")
458
-
459
- def deprecated_update_data_1(metric, search_term, selected_columns):
460
- deprecated_model_handler.get_vidore_data(metric)
461
- data = deprecated_model_handler.render_df(metric, benchmark_version=1)
462
- data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns)
463
- data = filter_models(data, search_term)
464
- # data = remove_duplicates(data) # Add this line
465
- if selected_columns:
466
- data = data[["Rank", "Model", "Average"] + selected_columns]
467
- return data
468
-
469
- with gr.Row():
470
- deprecated_refresh_button_1 = gr.Button("Refresh")
471
- deprecated_refresh_button_1.click(
472
- deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
473
- inputs=[deprecated_metric_dropdown_1],
474
- outputs=deprecated_dataframe_1,
475
- concurrency_limit=20,
476
- )
477
-
478
- # Automatically refresh the dataframe when the dropdown value changes
479
- deprecated_metric_dropdown_1.change(
480
- deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
481
- inputs=[deprecated_metric_dropdown_1],
482
- outputs=deprecated_dataframe_1,
483
- )
484
- deprecated_research_textbox_1.submit(
485
- lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
486
- inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
487
- outputs=deprecated_dataframe_1,
488
- )
489
- deprecated_column_checkboxes_1.change(
490
- lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
491
- inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
492
- outputs=deprecated_dataframe_1,
493
- )
494
-
495
- gr.Markdown(
496
- f"""
497
- - **Total Datasets**: {deprecated_num_datasets_1}
498
- - **Total Scores**: {deprecated_num_scores_1}
499
- - **Total Models**: {deprecated_num_models_1}
500
- """
501
- + r"""
502
- Please consider citing:
503
-
504
- ```bibtex
505
- @misc{faysse2024colpaliefficientdocumentretrieval,
506
- title={ColPali: Efficient Document Retrieval with Vision Language Models},
507
- author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
508
- year={2024},
509
- eprint={2407.01449},
510
- archivePrefix={arXiv},
511
- primaryClass={cs.IR},
512
- url={https://arxiv.org/abs/2407.01449},
513
- }
514
-
515
- @misc{macé2025vidorebenchmarkv2raising,
516
- title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
517
- author={Quentin Macé and António Loison and Manuel Faysse},
518
- year={2025},
519
- eprint={2505.17166},
520
- archivePrefix={arXiv},
521
- primaryClass={cs.IR},
522
- url={https://arxiv.org/abs/2505.17166},
523
- }
524
- ```
525
- """
526
- )
527
 
528
  block.queue(max_size=10).launch(debug=True)
529
 
 
530
  if __name__ == "__main__":
531
  main()
 
37
  num_models_2 = len(data_benchmark_2)
38
 
39
  # Get deprecated results
40
+ # deprecated_model_handler = DeprecatedModelHandler()
41
+ # initial_metric = "ndcg_at_5"
42
 
43
+ # deprecated_model_handler.get_vidore_data(initial_metric)
44
+ # deprecated_data_benchmark_1 = deprecated_model_handler.render_df(initial_metric, benchmark_version=1)
45
+ # deprecated_data_benchmark_1 = add_rank_and_format(deprecated_data_benchmark_1, benchmark_version=1)
46
 
47
+ # deprecated_data_benchmark_2 = deprecated_model_handler.render_df(initial_metric, benchmark_version=2)
48
+ # deprecated_data_benchmark_2 = add_rank_and_format(deprecated_data_benchmark_2, benchmark_version=2)
49
 
50
+ # deprecated_num_datasets_1 = len(deprecated_data_benchmark_1.columns) - 3
51
+ # deprecated_num_scores_1 = len(deprecated_data_benchmark_1) * deprecated_num_datasets_1
52
+ # deprecated_num_models_1 = len(deprecated_data_benchmark_1)
53
 
54
+ # deprecated_num_datasets_2 = len(deprecated_data_benchmark_2.columns) - 3
55
+ # deprecated_num_scores_2 = len(deprecated_data_benchmark_2) * deprecated_num_datasets_2
56
+ # deprecated_num_models_2 = len(deprecated_data_benchmark_2)
57
 
58
  css = """
59
  table > thead {
 
84
  gr.Markdown("# ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases 📚🔍")
85
  with gr.Row(variant="panel"):
86
  gr.Markdown("""
87
+ ### ⚠️ To access the ViDoRe V3 results, please refer directly to the [MTEB Leaderboard](http://mteb-leaderboard.hf.space/?benchmark_name=ViDoRe%28v3%29).
88
  **ViDoRe V3 is fully integrated into MTEB, which provides a unified platform for evaluating embedding models across various tasks, including document retrieval.**
89
  **We decided to display ViDoRe V3 results directly on MTEB to leverage its extensive features and community.**
90
  """)
 
309
 
310
  ### Deprecated Tabs ###
311
 
312
+ # with gr.TabItem("⚠️ Deprecated ViDoRe V2"):
313
+ # gr.Markdown(
314
+ # "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
315
+ # "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
316
+ # "which is no longer maintained. Results should be computed using the "
317
+ # "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
318
+ # "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
319
+ # )
320
+ # gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
321
+ # gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe V2: A new visual Document Retrieval Benchmark 📚🔍")
322
+ # gr.Markdown("### A harder dataset benchmark for visual document retrieval 👀")
323
+
324
+ # gr.Markdown(
325
+ # """
326
+ # Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab.
327
+
328
+ # Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models.
329
+ # """
330
+ # )
331
+ # deprecated_datasets_columns_2 = list(deprecated_data_benchmark_2.columns[3:])
332
+
333
+ # with gr.Row():
334
+ # deprecated_metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
335
+ # deprecated_research_textbox_2 = gr.Textbox(
336
+ # placeholder="🔍 Search Models... [press enter]",
337
+ # label="Filter Models by Name",
338
+ # )
339
+ # deprecated_column_checkboxes_2 = gr.CheckboxGroup(
340
+ # choices=deprecated_datasets_columns_2, value=deprecated_datasets_columns_2, label="Select Columns to Display"
341
+ # )
342
+
343
+ # with gr.Row():
344
+ # deprecated_datatype_2 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_2 + 1)
345
+ # deprecated_dataframe_2 = gr.Dataframe(deprecated_data_benchmark_2, datatype=deprecated_datatype_2, type="pandas")
346
+
347
+ # def deprecated_update_data_2(metric, search_term, selected_columns):
348
+ # deprecated_model_handler.get_vidore_data(metric)
349
+ # data = deprecated_model_handler.render_df(metric, benchmark_version=2)
350
+ # data = add_rank_and_format(data, benchmark_version=2, selected_columns=selected_columns)
351
+ # data = filter_models(data, search_term)
352
+ # # data = remove_duplicates(data) # Add this line
353
+ # if selected_columns:
354
+ # data = data[["Rank", "Model", "Average"] + selected_columns]
355
+ # return data
356
+
357
+ # with gr.Row():
358
+ # deprecated_refresh_button_2 = gr.Button("Refresh")
359
+ # deprecated_refresh_button_2.click(
360
+ # deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
361
+ # inputs=[deprecated_metric_dropdown_2],
362
+ # outputs=deprecated_dataframe_2,
363
+ # concurrency_limit=20,
364
+ # )
365
+
366
+ # with gr.Row():
367
+ # gr.Markdown(
368
+ # """
369
+ # **Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side.
370
+ # Those numbers are not numbers obtained from the organisations that released those models.
371
+ # """
372
+ # )
373
+
374
+ # # Automatically refresh the dataframe when the dropdown value changes
375
+ # deprecated_metric_dropdown_2.change(
376
+ # deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
377
+ # inputs=[deprecated_metric_dropdown_2],
378
+ # outputs=deprecated_dataframe_2,
379
+ # )
380
+ # deprecated_research_textbox_2.submit(
381
+ # lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
382
+ # inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
383
+ # outputs=deprecated_dataframe_2,
384
+ # )
385
+ # deprecated_column_checkboxes_2.change(
386
+ # lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
387
+ # inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
388
+ # outputs=deprecated_dataframe_2,
389
+ # )
390
+
391
+ # gr.Markdown(
392
+ # f"""
393
+ # - **Total Datasets**: {deprecated_num_datasets_2}
394
+ # - **Total Scores**: {deprecated_num_scores_2}
395
+ # - **Total Models**: {deprecated_num_models_2}
396
+ # """
397
+ # + r"""
398
+ # Please consider citing:
399
+
400
+ # ```bibtex
401
+ # @misc{faysse2024colpaliefficientdocumentretrieval,
402
+ # title={ColPali: Efficient Document Retrieval with Vision Language Models},
403
+ # author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
404
+ # year={2024},
405
+ # eprint={2407.01449},
406
+ # archivePrefix={arXiv},
407
+ # primaryClass={cs.IR},
408
+ # url={https://arxiv.org/abs/2407.01449},
409
+ # }
410
+
411
+ # @misc{macé2025vidorebenchmarkv2raising,
412
+ # title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
413
+ # author={Quentin Macé and António Loison and Manuel Faysse},
414
+ # year={2025},
415
+ # eprint={2505.17166},
416
+ # archivePrefix={arXiv},
417
+ # primaryClass={cs.IR},
418
+ # url={https://arxiv.org/abs/2505.17166},
419
+ # }
420
+ # ```
421
+ # """
422
+ # )
423
+
424
+ # with gr.TabItem("⚠️ Deprecated ViDoRe V1"):
425
+ # gr.Markdown(
426
+ # "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
427
+ # "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
428
+ # "which is no longer maintained. Results should be computed using the "
429
+ # "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
430
+ # "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
431
+ # )
432
+ # gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
433
+ # gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe: The Visual Document Retrieval Benchmark 1 📚🔍")
434
+ # gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀")
435
+
436
+ # gr.Markdown(
437
+ # """
438
+ # Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab.
439
+
440
+ # Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
441
+ # """
442
+ # )
443
+ # deprecated_datasets_columns_1 = list(deprecated_data_benchmark_1.columns[3:])
444
+
445
+ # with gr.Row():
446
+ # deprecated_metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
447
+ # deprecated_research_textbox_1 = gr.Textbox(
448
+ # placeholder="🔍 Search Models... [press enter]",
449
+ # label="Filter Models by Name",
450
+ # )
451
+ # deprecated_column_checkboxes_1 = gr.CheckboxGroup(
452
+ # choices=deprecated_datasets_columns_1, value=deprecated_datasets_columns_1, label="Select Columns to Display"
453
+ # )
454
+
455
+ # with gr.Row():
456
+ # deprecated_datatype_1 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_1 + 1)
457
+ # deprecated_dataframe_1 = gr.Dataframe(deprecated_data_benchmark_1, datatype=deprecated_datatype_1, type="pandas")
458
+
459
+ # def deprecated_update_data_1(metric, search_term, selected_columns):
460
+ # deprecated_model_handler.get_vidore_data(metric)
461
+ # data = deprecated_model_handler.render_df(metric, benchmark_version=1)
462
+ # data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns)
463
+ # data = filter_models(data, search_term)
464
+ # # data = remove_duplicates(data) # Add this line
465
+ # if selected_columns:
466
+ # data = data[["Rank", "Model", "Average"] + selected_columns]
467
+ # return data
468
+
469
+ # with gr.Row():
470
+ # deprecated_refresh_button_1 = gr.Button("Refresh")
471
+ # deprecated_refresh_button_1.click(
472
+ # deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
473
+ # inputs=[deprecated_metric_dropdown_1],
474
+ # outputs=deprecated_dataframe_1,
475
+ # concurrency_limit=20,
476
+ # )
477
+
478
+ # # Automatically refresh the dataframe when the dropdown value changes
479
+ # deprecated_metric_dropdown_1.change(
480
+ # deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
481
+ # inputs=[deprecated_metric_dropdown_1],
482
+ # outputs=deprecated_dataframe_1,
483
+ # )
484
+ # deprecated_research_textbox_1.submit(
485
+ # lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
486
+ # inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
487
+ # outputs=deprecated_dataframe_1,
488
+ # )
489
+ # deprecated_column_checkboxes_1.change(
490
+ # lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
491
+ # inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
492
+ # outputs=deprecated_dataframe_1,
493
+ # )
494
+
495
+ # gr.Markdown(
496
+ # f"""
497
+ # - **Total Datasets**: {deprecated_num_datasets_1}
498
+ # - **Total Scores**: {deprecated_num_scores_1}
499
+ # - **Total Models**: {deprecated_num_models_1}
500
+ # """
501
+ # + r"""
502
+ # Please consider citing:
503
+
504
+ # ```bibtex
505
+ # @misc{faysse2024colpaliefficientdocumentretrieval,
506
+ # title={ColPali: Efficient Document Retrieval with Vision Language Models},
507
+ # author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
508
+ # year={2024},
509
+ # eprint={2407.01449},
510
+ # archivePrefix={arXiv},
511
+ # primaryClass={cs.IR},
512
+ # url={https://arxiv.org/abs/2407.01449},
513
+ # }
514
+
515
+ # @misc{macé2025vidorebenchmarkv2raising,
516
+ # title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
517
+ # author={Quentin Macé and António Loison and Manuel Faysse},
518
+ # year={2025},
519
+ # eprint={2505.17166},
520
+ # archivePrefix={arXiv},
521
+ # primaryClass={cs.IR},
522
+ # url={https://arxiv.org/abs/2505.17166},
523
+ # }
524
+ # ```
525
+ # """
526
+ # )
527
 
528
  block.queue(max_size=10).launch(debug=True)
529
 
530
+
531
  if __name__ == "__main__":
532
  main()
app/utils.py CHANGED
@@ -18,34 +18,34 @@ def make_clickable_model(model_name, link=None):
18
 
19
 
20
  def add_rank(df, benchmark_version=1, selected_columns=None):
21
- df.fillna(0.0, inplace=True)
22
- if selected_columns is None:
23
- cols_to_rank = [
24
- col
25
- for col in df.columns
26
- if col
27
- not in [
28
- "Model",
29
- "Model Size (Million Parameters)",
30
- "Memory Usage (GB, fp32)",
31
- "Embedding Dimensions",
32
- "Max Tokens",
33
- ]
34
  ]
35
- else:
36
- cols_to_rank = selected_columns
37
-
38
- if len(cols_to_rank) == 1:
39
- df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
40
- else:
41
- df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
42
- df.sort_values("Average", ascending=False, inplace=True)
43
- df.insert(0, "Rank", list(range(1, len(df) + 1)))
44
- # multiply values by 100 if they are floats and round to 1 decimal place
45
- for col in df.columns:
46
- if df[col].dtype == "float64" and col != "Model Size (Million Parameters)":
47
- df[col] = df[col].apply(lambda x: round(x * 100, 1))
48
- return df
 
49
 
50
 
51
  def add_rank_and_format(df, benchmark_version=1, selected_columns=None):
@@ -74,6 +74,7 @@ def get_refresh_function(model_handler, benchmark_version):
74
 
75
  return _refresh
76
 
 
77
  def deprecated_get_refresh_function(model_handler, benchmark_version):
78
  def _refresh(metric):
79
  model_handler.get_vidore_data(metric)
 
18
 
19
 
20
  def add_rank(df, benchmark_version=1, selected_columns=None):
21
+ df.fillna(0.0, inplace=True)
22
+ if selected_columns is None:
23
+ cols_to_rank = [
24
+ col
25
+ for col in df.columns
26
+ if col
27
+ not in [
28
+ "Model",
29
+ "Model Size (Million Parameters)",
30
+ "Memory Usage (GB, fp32)",
31
+ "Embedding Dimensions",
32
+ "Max Tokens",
 
33
  ]
34
+ ]
35
+ else:
36
+ cols_to_rank = selected_columns
37
+
38
+ if len(cols_to_rank) == 1:
39
+ df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
40
+ else:
41
+ df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
42
+ df.sort_values("Average", ascending=False, inplace=True)
43
+ df.insert(0, "Rank", list(range(1, len(df) + 1)))
44
+ # multiply values by 100 if they are floats and round to 1 decimal place
45
+ for col in df.columns:
46
+ if df[col].dtype == "float64" and col != "Model Size (Million Parameters)":
47
+ df[col] = df[col].apply(lambda x: round(x * 100, 1))
48
+ return df
49
 
50
 
51
  def add_rank_and_format(df, benchmark_version=1, selected_columns=None):
 
74
 
75
  return _refresh
76
 
77
+
78
  def deprecated_get_refresh_function(model_handler, benchmark_version):
79
  def _refresh(metric):
80
  model_handler.get_vidore_data(metric)
data/deprecated_model_handler.py CHANGED
@@ -5,7 +5,11 @@ from typing import Any, Dict
5
  import pandas as pd
6
  from huggingface_hub import HfApi, hf_hub_download, metadata_load
7
 
8
- from .dataset_handler import DEPRECATED_VIDORE_2_DATASETS_KEYWORDS, DEPRECATED_VIDORE_DATASETS_KEYWORDS, deprecated_get_datasets_nickname
 
 
 
 
9
 
10
  BLOCKLIST = ["impactframes"]
11
 
@@ -92,7 +96,9 @@ class DeprecatedModelHandler:
92
  # In order to keep only models relevant to a benchmark
93
  def filter_models_by_benchmark(self, benchmark_version=1):
94
  filtered_model_infos = {}
95
- keywords = DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
 
 
96
 
97
  for model, info in self.model_infos.items():
98
  results = info["results"]
@@ -109,7 +115,11 @@ class DeprecatedModelHandler:
109
  for model in filtered_model_infos.keys():
110
  res = filtered_model_infos[model]["results"]
111
  dataset_res = {}
112
- keywords = DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
 
 
 
 
113
  for dataset in res.keys():
114
  if not any(keyword in dataset for keyword in keywords):
115
  continue
 
5
  import pandas as pd
6
  from huggingface_hub import HfApi, hf_hub_download, metadata_load
7
 
8
+ from .dataset_handler import (
9
+ DEPRECATED_VIDORE_2_DATASETS_KEYWORDS,
10
+ DEPRECATED_VIDORE_DATASETS_KEYWORDS,
11
+ deprecated_get_datasets_nickname,
12
+ )
13
 
14
  BLOCKLIST = ["impactframes"]
15
 
 
96
  # In order to keep only models relevant to a benchmark
97
  def filter_models_by_benchmark(self, benchmark_version=1):
98
  filtered_model_infos = {}
99
+ keywords = (
100
+ DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
101
+ )
102
 
103
  for model, info in self.model_infos.items():
104
  results = info["results"]
 
115
  for model in filtered_model_infos.keys():
116
  res = filtered_model_infos[model]["results"]
117
  dataset_res = {}
118
+ keywords = (
119
+ DEPRECATED_VIDORE_DATASETS_KEYWORDS
120
+ if benchmark_version == 1
121
+ else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
122
+ )
123
  for dataset in res.keys():
124
  if not any(keyword in dataset for keyword in keywords):
125
  continue
data/model_handler.py CHANGED
@@ -6,18 +6,14 @@ import pandas as pd
6
 
7
  from .dataset_handler import VIDORE_V1_MTEB_NAMES, VIDORE_V2_MTEB_NAMES, get_datasets_nickname
8
 
9
- class ModelHandler:
10
 
 
11
  def __init__(self):
12
  self.model_infos = {}
13
 
14
  @staticmethod
15
  def get_folders(dir_path):
16
- return sorted([
17
- path_
18
- for path_ in os.listdir(dir_path)
19
- if os.path.isdir(os.path.join(dir_path, path_))
20
- ])
21
 
22
  def get_vidore_data(self, metric="ndcg_at_5"):
23
  repo_url = "https://github.com/embeddings-benchmark/results.git"
@@ -37,22 +33,36 @@ class ModelHandler:
37
  first_revision = revisions[0]
38
  result_filenames = [
39
  result_filename
40
- for result_filename in os.listdir(os.path.join(local_path, folder_of_interest, model_name, first_revision))
 
 
41
  # if result_filename.endswith(".json") and result_filename != "model_meta.json"
42
  ]
43
  if "model_meta.json" in result_filenames:
44
- with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, "model_meta.json"), "r") as f:
 
 
45
  meta = json.load(f)
46
  else:
47
  meta = {}
48
  results = {}
49
  if all(f"{v1_dataset_name}.json" in result_filenames for v1_dataset_name in VIDORE_V1_MTEB_NAMES):
50
  for v1_dataset_name in VIDORE_V1_MTEB_NAMES:
51
- with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, f"{v1_dataset_name}.json"), "r") as f:
 
 
 
 
 
52
  results[v1_dataset_name] = json.load(f)
53
  if all(f"{v2_dataset_name}.json" in result_filenames for v2_dataset_name in VIDORE_V2_MTEB_NAMES):
54
  for v2_dataset_name in VIDORE_V2_MTEB_NAMES:
55
- with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, f"{v2_dataset_name}.json"), "r") as f:
 
 
 
 
 
56
  results[v2_dataset_name] = json.load(f)
57
  if model_name not in self.model_infos:
58
  self.model_infos[model_name] = {}
@@ -79,7 +89,9 @@ class ModelHandler:
79
  keywords = VIDORE_V1_MTEB_NAMES if benchmark_version == 1 else VIDORE_V2_MTEB_NAMES
80
  if "n_parameters" in filtered_model_infos[model]["meta"]:
81
  try:
82
- dataset_res["Model Size (Million Parameters)"] = filtered_model_infos[model]["meta"]["n_parameters"] // 1_000_000
 
 
83
  except TypeError:
84
  dataset_res["Model Size (Million Parameters)"] = -1
85
  else:
 
6
 
7
  from .dataset_handler import VIDORE_V1_MTEB_NAMES, VIDORE_V2_MTEB_NAMES, get_datasets_nickname
8
 
 
9
 
10
+ class ModelHandler:
11
  def __init__(self):
12
  self.model_infos = {}
13
 
14
  @staticmethod
15
  def get_folders(dir_path):
16
+ return sorted([path_ for path_ in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, path_))])
 
 
 
 
17
 
18
  def get_vidore_data(self, metric="ndcg_at_5"):
19
  repo_url = "https://github.com/embeddings-benchmark/results.git"
 
33
  first_revision = revisions[0]
34
  result_filenames = [
35
  result_filename
36
+ for result_filename in os.listdir(
37
+ os.path.join(local_path, folder_of_interest, model_name, first_revision)
38
+ )
39
  # if result_filename.endswith(".json") and result_filename != "model_meta.json"
40
  ]
41
  if "model_meta.json" in result_filenames:
42
+ with open(
43
+ os.path.join(local_path, folder_of_interest, model_name, first_revision, "model_meta.json"), "r"
44
+ ) as f:
45
  meta = json.load(f)
46
  else:
47
  meta = {}
48
  results = {}
49
  if all(f"{v1_dataset_name}.json" in result_filenames for v1_dataset_name in VIDORE_V1_MTEB_NAMES):
50
  for v1_dataset_name in VIDORE_V1_MTEB_NAMES:
51
+ with open(
52
+ os.path.join(
53
+ local_path, folder_of_interest, model_name, first_revision, f"{v1_dataset_name}.json"
54
+ ),
55
+ "r",
56
+ ) as f:
57
  results[v1_dataset_name] = json.load(f)
58
  if all(f"{v2_dataset_name}.json" in result_filenames for v2_dataset_name in VIDORE_V2_MTEB_NAMES):
59
  for v2_dataset_name in VIDORE_V2_MTEB_NAMES:
60
+ with open(
61
+ os.path.join(
62
+ local_path, folder_of_interest, model_name, first_revision, f"{v2_dataset_name}.json"
63
+ ),
64
+ "r",
65
+ ) as f:
66
  results[v2_dataset_name] = json.load(f)
67
  if model_name not in self.model_infos:
68
  self.model_infos[model_name] = {}
 
89
  keywords = VIDORE_V1_MTEB_NAMES if benchmark_version == 1 else VIDORE_V2_MTEB_NAMES
90
  if "n_parameters" in filtered_model_infos[model]["meta"]:
91
  try:
92
+ dataset_res["Model Size (Million Parameters)"] = (
93
+ filtered_model_infos[model]["meta"]["n_parameters"] // 1_000_000
94
+ )
95
  except TypeError:
96
  dataset_res["Model Size (Million Parameters)"] = -1
97
  else:
results ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit a3903080f8067ae1b491dfafae34d4e40121bcbf