Spaces:

vidore
/

vidore-leaderboard

Running

App Files Files Community

antoineedy commited on Nov 18, 2025

Commit

baaa011

1 Parent(s): bf183ba

remove deprecated tabs

Browse files

Files changed (6) hide show

.gitignore +2 -1
app.py +230 -229
app/utils.py +28 -27
data/deprecated_model_handler.py +13 -3
data/model_handler.py +23 -11
results +1 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 .venv
 *.json
-*.pyc

 .venv
 *.json
+*.pyc
+.DS_Store

app.py CHANGED Viewed

@@ -37,23 +37,23 @@ def main():
     num_models_2 = len(data_benchmark_2)
     # Get deprecated results
-    deprecated_model_handler = DeprecatedModelHandler()
-    initial_metric = "ndcg_at_5"
-    deprecated_model_handler.get_vidore_data(initial_metric)
-    deprecated_data_benchmark_1 = deprecated_model_handler.render_df(initial_metric, benchmark_version=1)
-    deprecated_data_benchmark_1 = add_rank_and_format(deprecated_data_benchmark_1, benchmark_version=1)
-    deprecated_data_benchmark_2 = deprecated_model_handler.render_df(initial_metric, benchmark_version=2)
-    deprecated_data_benchmark_2 = add_rank_and_format(deprecated_data_benchmark_2, benchmark_version=2)
-    deprecated_num_datasets_1 = len(deprecated_data_benchmark_1.columns) - 3
-    deprecated_num_scores_1 = len(deprecated_data_benchmark_1) * deprecated_num_datasets_1
-    deprecated_num_models_1 = len(deprecated_data_benchmark_1)
-    deprecated_num_datasets_2 = len(deprecated_data_benchmark_2.columns) - 3
-    deprecated_num_scores_2 = len(deprecated_data_benchmark_2) * deprecated_num_datasets_2
-    deprecated_num_models_2 = len(deprecated_data_benchmark_2)
     css = """
     table > thead {
@@ -84,7 +84,7 @@ def main():
                 gr.Markdown("# ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases 📚🔍")
                 with gr.Row(variant="panel"):
                     gr.Markdown("""
-                                ### ⚠️ To access the ViDoRe V3 results, please refer directly to the [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
                                 **ViDoRe V3 is fully integrated into MTEB, which provides a unified platform for evaluating embedding models across various tasks, including document retrieval.**
                                 **We decided to display ViDoRe V3 results directly on MTEB to leverage its extensive features and community.**
                                 """)
@@ -309,223 +309,224 @@ def main():
             ### Deprecated Tabs ###
-            with gr.TabItem("⚠️ Deprecated ViDoRe V2"):
-                gr.Markdown(
-                    "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
-                    "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
-                    "which is no longer maintained. Results should be computed using the "
-                    "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
-                    "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
-                )
-                gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
-                gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe V2: A new visual Document Retrieval Benchmark 📚🔍")
-                gr.Markdown("### A harder dataset benchmark for visual document retrieval 👀")
-                gr.Markdown(
-                    """
-                Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab.
-                Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models.
-                """
-                )
-                deprecated_datasets_columns_2 = list(deprecated_data_benchmark_2.columns[3:])
-                with gr.Row():
-                    deprecated_metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
-                    deprecated_research_textbox_2 = gr.Textbox(
-                        placeholder="🔍 Search Models... [press enter]",
-                        label="Filter Models by Name",
-                    )
-                    deprecated_column_checkboxes_2 = gr.CheckboxGroup(
-                        choices=deprecated_datasets_columns_2, value=deprecated_datasets_columns_2, label="Select Columns to Display"
-                    )
-                with gr.Row():
-                    deprecated_datatype_2 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_2 + 1)
-                    deprecated_dataframe_2 = gr.Dataframe(deprecated_data_benchmark_2, datatype=deprecated_datatype_2, type="pandas")
-                def deprecated_update_data_2(metric, search_term, selected_columns):
-                    deprecated_model_handler.get_vidore_data(metric)
-                    data = deprecated_model_handler.render_df(metric, benchmark_version=2)
-                    data = add_rank_and_format(data, benchmark_version=2, selected_columns=selected_columns)
-                    data = filter_models(data, search_term)
-                    # data = remove_duplicates(data)  # Add this line
-                    if selected_columns:
-                        data = data[["Rank", "Model", "Average"] + selected_columns]
-                    return data
-                with gr.Row():
-                    deprecated_refresh_button_2 = gr.Button("Refresh")
-                    deprecated_refresh_button_2.click(
-                        deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
-                        inputs=[deprecated_metric_dropdown_2],
-                        outputs=deprecated_dataframe_2,
-                        concurrency_limit=20,
-                    )
-                with gr.Row():
-                    gr.Markdown(
-                        """
-                    **Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side.
-                    Those numbers are not numbers obtained from the organisations that released those models.
-                    """
-                    )
-                # Automatically refresh the dataframe when the dropdown value changes
-                deprecated_metric_dropdown_2.change(
-                    deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
-                    inputs=[deprecated_metric_dropdown_2],
-                    outputs=deprecated_dataframe_2,
-                )
-                deprecated_research_textbox_2.submit(
-                    lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
-                    inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
-                    outputs=deprecated_dataframe_2,
-                )
-                deprecated_column_checkboxes_2.change(
-                    lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
-                    inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
-                    outputs=deprecated_dataframe_2,
-                )
-                gr.Markdown(
-                    f"""
-                - **Total Datasets**: {deprecated_num_datasets_2}
-                - **Total Scores**: {deprecated_num_scores_2}
-                - **Total Models**: {deprecated_num_models_2}
-                """
-                    + r"""
-                Please consider citing:
-                ```bibtex
-                @misc{faysse2024colpaliefficientdocumentretrieval,
-                  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-                  author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
-                  year={2024},
-                  eprint={2407.01449},
-                  archivePrefix={arXiv},
-                  primaryClass={cs.IR},
-                  url={https://arxiv.org/abs/2407.01449},
-                }
-                @misc{macé2025vidorebenchmarkv2raising,
-                      title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-                      author={Quentin Macé and António Loison and Manuel Faysse},
-                      year={2025},
-                      eprint={2505.17166},
-                      archivePrefix={arXiv},
-                      primaryClass={cs.IR},
-                      url={https://arxiv.org/abs/2505.17166},
-                }
-                ```
-                """
-                )
-            with gr.TabItem("⚠️ Deprecated ViDoRe V1"):
-                gr.Markdown(
-                    "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
-                    "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
-                    "which is no longer maintained. Results should be computed using the "
-                    "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
-                    "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
-                )
-                gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
-                gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe: The Visual Document Retrieval Benchmark 1 📚🔍")
-                gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀")
-                gr.Markdown(
-                    """
-                Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab.
-                Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
-                """
-                )
-                deprecated_datasets_columns_1 = list(deprecated_data_benchmark_1.columns[3:])
-                with gr.Row():
-                    deprecated_metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
-                    deprecated_research_textbox_1 = gr.Textbox(
-                        placeholder="🔍 Search Models... [press enter]",
-                        label="Filter Models by Name",
-                    )
-                    deprecated_column_checkboxes_1 = gr.CheckboxGroup(
-                        choices=deprecated_datasets_columns_1, value=deprecated_datasets_columns_1, label="Select Columns to Display"
-                    )
-                with gr.Row():
-                    deprecated_datatype_1 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_1 + 1)
-                    deprecated_dataframe_1 = gr.Dataframe(deprecated_data_benchmark_1, datatype=deprecated_datatype_1, type="pandas")
-                def deprecated_update_data_1(metric, search_term, selected_columns):
-                    deprecated_model_handler.get_vidore_data(metric)
-                    data = deprecated_model_handler.render_df(metric, benchmark_version=1)
-                    data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns)
-                    data = filter_models(data, search_term)
-                    # data = remove_duplicates(data)  # Add this line
-                    if selected_columns:
-                        data = data[["Rank", "Model", "Average"] + selected_columns]
-                    return data
-                with gr.Row():
-                    deprecated_refresh_button_1 = gr.Button("Refresh")
-                    deprecated_refresh_button_1.click(
-                        deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
-                        inputs=[deprecated_metric_dropdown_1],
-                        outputs=deprecated_dataframe_1,
-                        concurrency_limit=20,
-                    )
-                # Automatically refresh the dataframe when the dropdown value changes
-                deprecated_metric_dropdown_1.change(
-                    deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
-                    inputs=[deprecated_metric_dropdown_1],
-                    outputs=deprecated_dataframe_1,
-                )
-                deprecated_research_textbox_1.submit(
-                    lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
-                    inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
-                    outputs=deprecated_dataframe_1,
-                )
-                deprecated_column_checkboxes_1.change(
-                    lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
-                    inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
-                    outputs=deprecated_dataframe_1,
-                )
-                gr.Markdown(
-                    f"""
-                - **Total Datasets**: {deprecated_num_datasets_1}
-                - **Total Scores**: {deprecated_num_scores_1}
-                - **Total Models**: {deprecated_num_models_1}
-                """
-                    + r"""
-                Please consider citing:
-                ```bibtex
-                @misc{faysse2024colpaliefficientdocumentretrieval,
-                  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-                  author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
-                  year={2024},
-                  eprint={2407.01449},
-                  archivePrefix={arXiv},
-                  primaryClass={cs.IR},
-                  url={https://arxiv.org/abs/2407.01449},
-                }
-                @misc{macé2025vidorebenchmarkv2raising,
-                  title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-                  author={Quentin Macé and António Loison and Manuel Faysse},
-                  year={2025},
-                  eprint={2505.17166},
-                  archivePrefix={arXiv},
-                  primaryClass={cs.IR},
-                  url={https://arxiv.org/abs/2505.17166},
-                }
-                ```
-                """
-                )
     block.queue(max_size=10).launch(debug=True)
 if __name__ == "__main__":
     main()

     num_models_2 = len(data_benchmark_2)
     # Get deprecated results
+    # deprecated_model_handler = DeprecatedModelHandler()
+    # initial_metric = "ndcg_at_5"
+    # deprecated_model_handler.get_vidore_data(initial_metric)
+    # deprecated_data_benchmark_1 = deprecated_model_handler.render_df(initial_metric, benchmark_version=1)
+    # deprecated_data_benchmark_1 = add_rank_and_format(deprecated_data_benchmark_1, benchmark_version=1)
+    # deprecated_data_benchmark_2 = deprecated_model_handler.render_df(initial_metric, benchmark_version=2)
+    # deprecated_data_benchmark_2 = add_rank_and_format(deprecated_data_benchmark_2, benchmark_version=2)
+    # deprecated_num_datasets_1 = len(deprecated_data_benchmark_1.columns) - 3
+    # deprecated_num_scores_1 = len(deprecated_data_benchmark_1) * deprecated_num_datasets_1
+    # deprecated_num_models_1 = len(deprecated_data_benchmark_1)
+    # deprecated_num_datasets_2 = len(deprecated_data_benchmark_2.columns) - 3
+    # deprecated_num_scores_2 = len(deprecated_data_benchmark_2) * deprecated_num_datasets_2
+    # deprecated_num_models_2 = len(deprecated_data_benchmark_2)
     css = """
     table > thead {
                 gr.Markdown("# ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases 📚🔍")
                 with gr.Row(variant="panel"):
                     gr.Markdown("""
+                                ### ⚠️ To access the ViDoRe V3 results, please refer directly to the [MTEB Leaderboard](http://mteb-leaderboard.hf.space/?benchmark_name=ViDoRe%28v3%29).
                                 **ViDoRe V3 is fully integrated into MTEB, which provides a unified platform for evaluating embedding models across various tasks, including document retrieval.**
                                 **We decided to display ViDoRe V3 results directly on MTEB to leverage its extensive features and community.**
                                 """)
             ### Deprecated Tabs ###
+            # with gr.TabItem("⚠️ Deprecated ViDoRe V2"):
+            #     gr.Markdown(
+            #         "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
+            #         "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
+            #         "which is no longer maintained. Results should be computed using the "
+            #         "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
+            #         "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
+            #     )
+            #     gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
+            #     gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe V2: A new visual Document Retrieval Benchmark 📚🔍")
+            #     gr.Markdown("### A harder dataset benchmark for visual document retrieval 👀")
+            #     gr.Markdown(
+            #         """
+            #     Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab.
+            #     Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models.
+            #     """
+            #     )
+            #     deprecated_datasets_columns_2 = list(deprecated_data_benchmark_2.columns[3:])
+            #     with gr.Row():
+            #         deprecated_metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
+            #         deprecated_research_textbox_2 = gr.Textbox(
+            #             placeholder="🔍 Search Models... [press enter]",
+            #             label="Filter Models by Name",
+            #         )
+            #         deprecated_column_checkboxes_2 = gr.CheckboxGroup(
+            #             choices=deprecated_datasets_columns_2, value=deprecated_datasets_columns_2, label="Select Columns to Display"
+            #         )
+            #     with gr.Row():
+            #         deprecated_datatype_2 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_2 + 1)
+            #         deprecated_dataframe_2 = gr.Dataframe(deprecated_data_benchmark_2, datatype=deprecated_datatype_2, type="pandas")
+            #     def deprecated_update_data_2(metric, search_term, selected_columns):
+            #         deprecated_model_handler.get_vidore_data(metric)
+            #         data = deprecated_model_handler.render_df(metric, benchmark_version=2)
+            #         data = add_rank_and_format(data, benchmark_version=2, selected_columns=selected_columns)
+            #         data = filter_models(data, search_term)
+            #         # data = remove_duplicates(data)  # Add this line
+            #         if selected_columns:
+            #             data = data[["Rank", "Model", "Average"] + selected_columns]
+            #         return data
+            #     with gr.Row():
+            #         deprecated_refresh_button_2 = gr.Button("Refresh")
+            #         deprecated_refresh_button_2.click(
+            #             deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
+            #             inputs=[deprecated_metric_dropdown_2],
+            #             outputs=deprecated_dataframe_2,
+            #             concurrency_limit=20,
+            #         )
+            #     with gr.Row():
+            #         gr.Markdown(
+            #             """
+            #         **Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side.
+            #         Those numbers are not numbers obtained from the organisations that released those models.
+            #         """
+            #         )
+            #     # Automatically refresh the dataframe when the dropdown value changes
+            #     deprecated_metric_dropdown_2.change(
+            #         deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2),
+            #         inputs=[deprecated_metric_dropdown_2],
+            #         outputs=deprecated_dataframe_2,
+            #     )
+            #     deprecated_research_textbox_2.submit(
+            #         lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
+            #         inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
+            #         outputs=deprecated_dataframe_2,
+            #     )
+            #     deprecated_column_checkboxes_2.change(
+            #         lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns),
+            #         inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2],
+            #         outputs=deprecated_dataframe_2,
+            #     )
+            #     gr.Markdown(
+            #         f"""
+            #     - **Total Datasets**: {deprecated_num_datasets_2}
+            #     - **Total Scores**: {deprecated_num_scores_2}
+            #     - **Total Models**: {deprecated_num_models_2}
+            #     """
+            #         + r"""
+            #     Please consider citing:
+            #     ```bibtex
+            #     @misc{faysse2024colpaliefficientdocumentretrieval,
+            #       title={ColPali: Efficient Document Retrieval with Vision Language Models},
+            #       author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
+            #       year={2024},
+            #       eprint={2407.01449},
+            #       archivePrefix={arXiv},
+            #       primaryClass={cs.IR},
+            #       url={https://arxiv.org/abs/2407.01449},
+            #     }
+            #     @misc{macé2025vidorebenchmarkv2raising,
+            #           title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+            #           author={Quentin Macé and António Loison and Manuel Faysse},
+            #           year={2025},
+            #           eprint={2505.17166},
+            #           archivePrefix={arXiv},
+            #           primaryClass={cs.IR},
+            #           url={https://arxiv.org/abs/2505.17166},
+            #     }
+            #     ```
+            #     """
+            #     )
+            # with gr.TabItem("⚠️ Deprecated ViDoRe V1"):
+            #     gr.Markdown(
+            #         "## <span style='color:red'>Deprecation notice: This leaderboard contains the results computed with the "
+            #         "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, "
+            #         "which is no longer maintained. Results should be computed using the "
+            #         "[mteb](https://github.com/embeddings-benchmark/mteb) package as described "
+            #         "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md).</span>"
+            #     )
+            #     gr.Markdown("## <span style='color:red'>Missing results in the new leaderboard are being added as they are re-computed.</span>")
+            #     gr.Markdown("# <span style='color:red'>[Deprecated]</span> ViDoRe: The Visual Document Retrieval Benchmark 1 📚🔍")
+            #     gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀")
+            #     gr.Markdown(
+            #         """
+            #     Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab.
+            #     Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
+            #     """
+            #     )
+            #     deprecated_datasets_columns_1 = list(deprecated_data_benchmark_1.columns[3:])
+            #     with gr.Row():
+            #         deprecated_metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
+            #         deprecated_research_textbox_1 = gr.Textbox(
+            #             placeholder="🔍 Search Models... [press enter]",
+            #             label="Filter Models by Name",
+            #         )
+            #         deprecated_column_checkboxes_1 = gr.CheckboxGroup(
+            #             choices=deprecated_datasets_columns_1, value=deprecated_datasets_columns_1, label="Select Columns to Display"
+            #         )
+            #     with gr.Row():
+            #         deprecated_datatype_1 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_1 + 1)
+            #         deprecated_dataframe_1 = gr.Dataframe(deprecated_data_benchmark_1, datatype=deprecated_datatype_1, type="pandas")
+            #     def deprecated_update_data_1(metric, search_term, selected_columns):
+            #         deprecated_model_handler.get_vidore_data(metric)
+            #         data = deprecated_model_handler.render_df(metric, benchmark_version=1)
+            #         data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns)
+            #         data = filter_models(data, search_term)
+            #         # data = remove_duplicates(data)  # Add this line
+            #         if selected_columns:
+            #             data = data[["Rank", "Model", "Average"] + selected_columns]
+            #         return data
+            #     with gr.Row():
+            #         deprecated_refresh_button_1 = gr.Button("Refresh")
+            #         deprecated_refresh_button_1.click(
+            #             deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
+            #             inputs=[deprecated_metric_dropdown_1],
+            #             outputs=deprecated_dataframe_1,
+            #             concurrency_limit=20,
+            #         )
+            #     # Automatically refresh the dataframe when the dropdown value changes
+            #     deprecated_metric_dropdown_1.change(
+            #         deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1),
+            #         inputs=[deprecated_metric_dropdown_1],
+            #         outputs=deprecated_dataframe_1,
+            #     )
+            #     deprecated_research_textbox_1.submit(
+            #         lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
+            #         inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
+            #         outputs=deprecated_dataframe_1,
+            #     )
+            #     deprecated_column_checkboxes_1.change(
+            #         lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns),
+            #         inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1],
+            #         outputs=deprecated_dataframe_1,
+            #     )
+            #     gr.Markdown(
+            #         f"""
+            #     - **Total Datasets**: {deprecated_num_datasets_1}
+            #     - **Total Scores**: {deprecated_num_scores_1}
+            #     - **Total Models**: {deprecated_num_models_1}
+            #     """
+            #         + r"""
+            #     Please consider citing:
+            #     ```bibtex
+            #     @misc{faysse2024colpaliefficientdocumentretrieval,
+            #       title={ColPali: Efficient Document Retrieval with Vision Language Models},
+            #       author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
+            #       year={2024},
+            #       eprint={2407.01449},
+            #       archivePrefix={arXiv},
+            #       primaryClass={cs.IR},
+            #       url={https://arxiv.org/abs/2407.01449},
+            #     }
+            #     @misc{macé2025vidorebenchmarkv2raising,
+            #       title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+            #       author={Quentin Macé and António Loison and Manuel Faysse},
+            #       year={2025},
+            #       eprint={2505.17166},
+            #       archivePrefix={arXiv},
+            #       primaryClass={cs.IR},
+            #       url={https://arxiv.org/abs/2505.17166},
+            #     }
+            #     ```
+            #     """
+            #     )
     block.queue(max_size=10).launch(debug=True)
 if __name__ == "__main__":
     main()

app/utils.py CHANGED Viewed

@@ -18,34 +18,34 @@ def make_clickable_model(model_name, link=None):
 def add_rank(df, benchmark_version=1, selected_columns=None):
-        df.fillna(0.0, inplace=True)
-        if selected_columns is None:
-            cols_to_rank = [
-                col
-                for col in df.columns
-                if col
-                not in [
-                    "Model",
-                    "Model Size (Million Parameters)",
-                    "Memory Usage (GB, fp32)",
-                    "Embedding Dimensions",
-                    "Max Tokens",
-                ]
             ]
-        else:
-            cols_to_rank = selected_columns
-        if len(cols_to_rank) == 1:
-            df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
-        else:
-            df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
-            df.sort_values("Average", ascending=False, inplace=True)
-        df.insert(0, "Rank", list(range(1, len(df) + 1)))
-        # multiply values by 100 if they are floats and round to 1 decimal place
-        for col in df.columns:
-            if df[col].dtype == "float64" and col != "Model Size (Million Parameters)":
-                df[col] = df[col].apply(lambda x: round(x * 100, 1))
-        return df
 def add_rank_and_format(df, benchmark_version=1, selected_columns=None):
@@ -74,6 +74,7 @@ def get_refresh_function(model_handler, benchmark_version):
     return _refresh
 def deprecated_get_refresh_function(model_handler, benchmark_version):
     def _refresh(metric):
         model_handler.get_vidore_data(metric)

 def add_rank(df, benchmark_version=1, selected_columns=None):
+    df.fillna(0.0, inplace=True)
+    if selected_columns is None:
+        cols_to_rank = [
+            col
+            for col in df.columns
+            if col
+            not in [
+                "Model",
+                "Model Size (Million Parameters)",
+                "Memory Usage (GB, fp32)",
+                "Embedding Dimensions",
+                "Max Tokens",
             ]
+        ]
+    else:
+        cols_to_rank = selected_columns
+    if len(cols_to_rank) == 1:
+        df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
+    else:
+        df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
+        df.sort_values("Average", ascending=False, inplace=True)
+    df.insert(0, "Rank", list(range(1, len(df) + 1)))
+    # multiply values by 100 if they are floats and round to 1 decimal place
+    for col in df.columns:
+        if df[col].dtype == "float64" and col != "Model Size (Million Parameters)":
+            df[col] = df[col].apply(lambda x: round(x * 100, 1))
+    return df
 def add_rank_and_format(df, benchmark_version=1, selected_columns=None):
     return _refresh
 def deprecated_get_refresh_function(model_handler, benchmark_version):
     def _refresh(metric):
         model_handler.get_vidore_data(metric)

data/deprecated_model_handler.py CHANGED Viewed

@@ -5,7 +5,11 @@ from typing import Any, Dict
 import pandas as pd
 from huggingface_hub import HfApi, hf_hub_download, metadata_load
-from .dataset_handler import DEPRECATED_VIDORE_2_DATASETS_KEYWORDS, DEPRECATED_VIDORE_DATASETS_KEYWORDS, deprecated_get_datasets_nickname
 BLOCKLIST = ["impactframes"]
@@ -92,7 +96,9 @@ class DeprecatedModelHandler:
     # In order to keep only models relevant to a benchmark
     def filter_models_by_benchmark(self, benchmark_version=1):
         filtered_model_infos = {}
-        keywords = DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
         for model, info in self.model_infos.items():
             results = info["results"]
@@ -109,7 +115,11 @@ class DeprecatedModelHandler:
             for model in filtered_model_infos.keys():
                 res = filtered_model_infos[model]["results"]
                 dataset_res = {}
-                keywords = DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
                 for dataset in res.keys():
                     if not any(keyword in dataset for keyword in keywords):
                         continue

 import pandas as pd
 from huggingface_hub import HfApi, hf_hub_download, metadata_load
+from .dataset_handler import (
+    DEPRECATED_VIDORE_2_DATASETS_KEYWORDS,
+    DEPRECATED_VIDORE_DATASETS_KEYWORDS,
+    deprecated_get_datasets_nickname,
+)
 BLOCKLIST = ["impactframes"]
     # In order to keep only models relevant to a benchmark
     def filter_models_by_benchmark(self, benchmark_version=1):
         filtered_model_infos = {}
+        keywords = (
+            DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
+        )
         for model, info in self.model_infos.items():
             results = info["results"]
             for model in filtered_model_infos.keys():
                 res = filtered_model_infos[model]["results"]
                 dataset_res = {}
+                keywords = (
+                    DEPRECATED_VIDORE_DATASETS_KEYWORDS
+                    if benchmark_version == 1
+                    else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
+                )
                 for dataset in res.keys():
                     if not any(keyword in dataset for keyword in keywords):
                         continue

data/model_handler.py CHANGED Viewed

@@ -6,18 +6,14 @@ import pandas as pd
 from .dataset_handler import VIDORE_V1_MTEB_NAMES, VIDORE_V2_MTEB_NAMES, get_datasets_nickname
-class ModelHandler:
     def __init__(self):
         self.model_infos = {}
     @staticmethod
     def get_folders(dir_path):
-        return sorted([
-            path_
-            for path_ in os.listdir(dir_path)
-            if os.path.isdir(os.path.join(dir_path, path_))
-        ])
     def get_vidore_data(self, metric="ndcg_at_5"):
         repo_url = "https://github.com/embeddings-benchmark/results.git"
@@ -37,22 +33,36 @@ class ModelHandler:
             first_revision = revisions[0]
             result_filenames = [
                 result_filename
-                for result_filename in os.listdir(os.path.join(local_path, folder_of_interest, model_name, first_revision))
                 # if result_filename.endswith(".json") and result_filename != "model_meta.json"
             ]
             if "model_meta.json" in result_filenames:
-                with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, "model_meta.json"), "r") as f:
                     meta = json.load(f)
             else:
                 meta = {}
             results = {}
             if all(f"{v1_dataset_name}.json" in result_filenames for v1_dataset_name in VIDORE_V1_MTEB_NAMES):
                 for v1_dataset_name in VIDORE_V1_MTEB_NAMES:
-                    with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, f"{v1_dataset_name}.json"), "r") as f:
                         results[v1_dataset_name] = json.load(f)
             if all(f"{v2_dataset_name}.json" in result_filenames for v2_dataset_name in VIDORE_V2_MTEB_NAMES):
                 for v2_dataset_name in VIDORE_V2_MTEB_NAMES:
-                    with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, f"{v2_dataset_name}.json"), "r") as f:
                         results[v2_dataset_name] = json.load(f)
                 if model_name not in self.model_infos:
                     self.model_infos[model_name] = {}
@@ -79,7 +89,9 @@ class ModelHandler:
                 keywords = VIDORE_V1_MTEB_NAMES if benchmark_version == 1 else VIDORE_V2_MTEB_NAMES
                 if "n_parameters" in filtered_model_infos[model]["meta"]:
                     try:
-                        dataset_res["Model Size (Million Parameters)"] = filtered_model_infos[model]["meta"]["n_parameters"] // 1_000_000
                     except TypeError:
                         dataset_res["Model Size (Million Parameters)"] = -1
                 else:

 from .dataset_handler import VIDORE_V1_MTEB_NAMES, VIDORE_V2_MTEB_NAMES, get_datasets_nickname
+class ModelHandler:
     def __init__(self):
         self.model_infos = {}
     @staticmethod
     def get_folders(dir_path):
+        return sorted([path_ for path_ in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, path_))])
     def get_vidore_data(self, metric="ndcg_at_5"):
         repo_url = "https://github.com/embeddings-benchmark/results.git"
             first_revision = revisions[0]
             result_filenames = [
                 result_filename
+                for result_filename in os.listdir(
+                    os.path.join(local_path, folder_of_interest, model_name, first_revision)
+                )
                 # if result_filename.endswith(".json") and result_filename != "model_meta.json"
             ]
             if "model_meta.json" in result_filenames:
+                with open(
+                    os.path.join(local_path, folder_of_interest, model_name, first_revision, "model_meta.json"), "r"
+                ) as f:
                     meta = json.load(f)
             else:
                 meta = {}
             results = {}
             if all(f"{v1_dataset_name}.json" in result_filenames for v1_dataset_name in VIDORE_V1_MTEB_NAMES):
                 for v1_dataset_name in VIDORE_V1_MTEB_NAMES:
+                    with open(
+                        os.path.join(
+                            local_path, folder_of_interest, model_name, first_revision, f"{v1_dataset_name}.json"
+                        ),
+                        "r",
+                    ) as f:
                         results[v1_dataset_name] = json.load(f)
             if all(f"{v2_dataset_name}.json" in result_filenames for v2_dataset_name in VIDORE_V2_MTEB_NAMES):
                 for v2_dataset_name in VIDORE_V2_MTEB_NAMES:
+                    with open(
+                        os.path.join(
+                            local_path, folder_of_interest, model_name, first_revision, f"{v2_dataset_name}.json"
+                        ),
+                        "r",
+                    ) as f:
                         results[v2_dataset_name] = json.load(f)
                 if model_name not in self.model_infos:
                     self.model_infos[model_name] = {}
                 keywords = VIDORE_V1_MTEB_NAMES if benchmark_version == 1 else VIDORE_V2_MTEB_NAMES
                 if "n_parameters" in filtered_model_infos[model]["meta"]:
                     try:
+                        dataset_res["Model Size (Million Parameters)"] = (
+                            filtered_model_infos[model]["meta"]["n_parameters"] // 1_000_000
+                        )
                     except TypeError:
                         dataset_res["Model Size (Million Parameters)"] = -1
                 else:

results ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit a3903080f8067ae1b491dfafae34d4e40121bcbf