Created
May 6, 2024 17:00
-
-
Save AkashC-goML/cc059a7c201f3a465b559970d2bdcb0c to your computer and use it in GitHub Desktop.
Re-Ranking and RAG.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyPkGR1di8gMBLLk4Pyuhr8I", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"2223dc261ac84ed2ad14d037859f3ca5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_e334812855214917a63e6a76155a6c35", | |
"IPY_MODEL_29e76a510c8c46d0808ce49388c670ea", | |
"IPY_MODEL_998658520adc4b6aa48b153794ff0be2" | |
], | |
"layout": "IPY_MODEL_cc7ea3053cd04d659c2b5dc7373badfa" | |
} | |
}, | |
"e334812855214917a63e6a76155a6c35": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_351dfdd986214391839d13c213fce093", | |
"placeholder": "", | |
"style": "IPY_MODEL_d32bbb0aa04f4639a5d94dc7cea034c2", | |
"value": "tokenizer_config.json: 100%" | |
} | |
}, | |
"29e76a510c8c46d0808ce49388c670ea": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_0d105cb2d94743f19bbea2c3bcda2bc5", | |
"max": 48, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_7607e8c994b34a18a8981ef8a7ef8783", | |
"value": 48 | |
} | |
}, | |
"998658520adc4b6aa48b153794ff0be2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_532aa34688e9423e94594d4967ef2022", | |
"placeholder": "", | |
"style": "IPY_MODEL_1857a6d5f40143e6941b27353af710a4", | |
"value": " 48.0/48.0 [00:00<00:00, 846B/s]" | |
} | |
}, | |
"cc7ea3053cd04d659c2b5dc7373badfa": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"351dfdd986214391839d13c213fce093": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"d32bbb0aa04f4639a5d94dc7cea034c2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"0d105cb2d94743f19bbea2c3bcda2bc5": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"7607e8c994b34a18a8981ef8a7ef8783": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"532aa34688e9423e94594d4967ef2022": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"1857a6d5f40143e6941b27353af710a4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"7ae13d0e2fa545aaa4d0cbc02a756f76": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_842ca0565e144b498183ca469afcaeb5", | |
"IPY_MODEL_ef65b485574d420c9d29ab713064df60", | |
"IPY_MODEL_81d7e39ffc8f4e238a16e1e051d6a70a" | |
], | |
"layout": "IPY_MODEL_687c5ad3bace41749062628140d6cef4" | |
} | |
}, | |
"842ca0565e144b498183ca469afcaeb5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_6bc98bc064bf46fabbc72425e6a04cde", | |
"placeholder": "", | |
"style": "IPY_MODEL_5bdb424674d5428a9718968f7ccb417f", | |
"value": "config.json: 100%" | |
} | |
}, | |
"ef65b485574d420c9d29ab713064df60": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_93a9a41588b140d59f423059745f0b74", | |
"max": 570, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_e60dbeba37df4f0db8c81f8413f61307", | |
"value": 570 | |
} | |
}, | |
"81d7e39ffc8f4e238a16e1e051d6a70a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_e990efa20a9e4f589d80b59fb294457e", | |
"placeholder": "", | |
"style": "IPY_MODEL_834352bbfa06497087ccc0fd282387c0", | |
"value": " 570/570 [00:00<00:00, 9.03kB/s]" | |
} | |
}, | |
"687c5ad3bace41749062628140d6cef4": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"6bc98bc064bf46fabbc72425e6a04cde": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"5bdb424674d5428a9718968f7ccb417f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"93a9a41588b140d59f423059745f0b74": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"e60dbeba37df4f0db8c81f8413f61307": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"e990efa20a9e4f589d80b59fb294457e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"834352bbfa06497087ccc0fd282387c0": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"5d5f74c4c8144064b4608167be6bedcf": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_268be2da8a5a473d9cea08e3b9084b66", | |
"IPY_MODEL_0b011360bbae45a3a7c2d222b93af477", | |
"IPY_MODEL_3c9e08b0edda4dfda74df38cc3a7137b" | |
], | |
"layout": "IPY_MODEL_889fb571413443a4adc255bbde65f337" | |
} | |
}, | |
"268be2da8a5a473d9cea08e3b9084b66": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_f422ba05478b47b9b81f107c8591451e", | |
"placeholder": "", | |
"style": "IPY_MODEL_1ab691653c744ee7a38a4d9988bd7e78", | |
"value": "vocab.txt: 100%" | |
} | |
}, | |
"0b011360bbae45a3a7c2d222b93af477": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_085dca686edb4aa9b115833ecf3de5aa", | |
"max": 231508, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_40846a21498d4c6c80c2a1d61c1e3a5a", | |
"value": 231508 | |
} | |
}, | |
"3c9e08b0edda4dfda74df38cc3a7137b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_6b570e5bf0eb4897be55c0498644bcb5", | |
"placeholder": "", | |
"style": "IPY_MODEL_8cac7ad24f624936a5b7b663da0413dd", | |
"value": " 232k/232k [00:00<00:00, 3.28MB/s]" | |
} | |
}, | |
"889fb571413443a4adc255bbde65f337": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f422ba05478b47b9b81f107c8591451e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"1ab691653c744ee7a38a4d9988bd7e78": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"085dca686edb4aa9b115833ecf3de5aa": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"40846a21498d4c6c80c2a1d61c1e3a5a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"6b570e5bf0eb4897be55c0498644bcb5": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"8cac7ad24f624936a5b7b663da0413dd": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"37d1a03a632e4eb887e93535e54a02fa": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_d0c3dcd59c1d4a95ba1d517e57d4bc75", | |
"IPY_MODEL_7991b3f594ed49dfad4c3b099a9d0340", | |
"IPY_MODEL_46715f152a8342e78e4877c99a579008" | |
], | |
"layout": "IPY_MODEL_d81a1a65d90e4e1b9ef1822b58318a2f" | |
} | |
}, | |
"d0c3dcd59c1d4a95ba1d517e57d4bc75": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_f5421fc2d3cb43c3bfcb986c0aed330d", | |
"placeholder": "", | |
"style": "IPY_MODEL_cd9fc73c223344fb89b9b2b9fd8d7e40", | |
"value": "tokenizer.json: 100%" | |
} | |
}, | |
"7991b3f594ed49dfad4c3b099a9d0340": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_b1895ceb637241bfb039845ffa00cd9d", | |
"max": 466062, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_c9df6b93c24448b29bdb661a08ddc7a8", | |
"value": 466062 | |
} | |
}, | |
"46715f152a8342e78e4877c99a579008": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_b675dca9a4034c21b7a15b35de53489d", | |
"placeholder": "", | |
"style": "IPY_MODEL_f3b229ef0b1d4081844c3b3cda230278", | |
"value": " 466k/466k [00:00<00:00, 5.67MB/s]" | |
} | |
}, | |
"d81a1a65d90e4e1b9ef1822b58318a2f": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f5421fc2d3cb43c3bfcb986c0aed330d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"cd9fc73c223344fb89b9b2b9fd8d7e40": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"b1895ceb637241bfb039845ffa00cd9d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"c9df6b93c24448b29bdb661a08ddc7a8": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"b675dca9a4034c21b7a15b35de53489d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f3b229ef0b1d4081844c3b3cda230278": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"2c6dcabb67cb44dd9a404b95ca420e25": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_2a309261c8674b78b5fdf624f560ddd6", | |
"IPY_MODEL_c88b636452e24cc7b2b683778c64eca5", | |
"IPY_MODEL_d5b2849304fe4064a650930a77f24b63" | |
], | |
"layout": "IPY_MODEL_3a908ad25df0497d9d32bb93e9ac76b3" | |
} | |
}, | |
"2a309261c8674b78b5fdf624f560ddd6": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_fd415a26ea2743ffa7fcedb3da165cfc", | |
"placeholder": "", | |
"style": "IPY_MODEL_fd9a381814f34925ba39cffb24485695", | |
"value": "model.safetensors: 100%" | |
} | |
}, | |
"c88b636452e24cc7b2b683778c64eca5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_0df6c55dfaad4e448e6ed3b676c7f3a2", | |
"max": 440449768, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_6e0ac3e6f31e49a980815c8a691d6691", | |
"value": 440449768 | |
} | |
}, | |
"d5b2849304fe4064a650930a77f24b63": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_588fe7a3b69048cda86fc1855b94d2fe", | |
"placeholder": "", | |
"style": "IPY_MODEL_23d8d7ba8f5540678b25ec8e002a7b22", | |
"value": " 440M/440M [00:06<00:00, 96.5MB/s]" | |
} | |
}, | |
"3a908ad25df0497d9d32bb93e9ac76b3": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"fd415a26ea2743ffa7fcedb3da165cfc": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"fd9a381814f34925ba39cffb24485695": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"0df6c55dfaad4e448e6ed3b676c7f3a2": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"6e0ac3e6f31e49a980815c8a691d6691": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"588fe7a3b69048cda86fc1855b94d2fe": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"23d8d7ba8f5540678b25ec8e002a7b22": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/AkashC-goML/cc059a7c201f3a465b559970d2bdcb0c/re-ranking-and-rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Install the required libraries:" | |
], | |
"metadata": { | |
"id": "9CfJ7EOet--K" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "D-U2zRqmt0Bb", | |
"outputId": "dc3b1c2f-8894-4cb6-a842-c38137545949" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.40.1)\n", | |
"Collecting faiss-cpu\n", | |
" Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.14.0)\n", | |
"Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", | |
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n", | |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n", | |
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", | |
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", | |
"Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n", | |
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n", | |
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n", | |
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n", | |
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.11.0)\n", | |
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", | |
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n", | |
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n", | |
"Installing collected packages: faiss-cpu\n", | |
"Successfully installed faiss-cpu-1.8.0\n" | |
] | |
} | |
], | |
"source": [ | |
"pip install transformers faiss-cpu\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Install the required libraries:\n" | |
], | |
"metadata": { | |
"id": "ptNyqH3muE1c" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import torch\n", | |
"from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", | |
"import faiss\n", | |
"import numpy as np" | |
], | |
"metadata": { | |
"id": "ptPZOdafuIIk" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Load a pre-trained language model and tokenizer:" | |
], | |
"metadata": { | |
"id": "6uixBl_VuKwm" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", | |
"model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 336, | |
"referenced_widgets": [ | |
"2223dc261ac84ed2ad14d037859f3ca5", | |
"e334812855214917a63e6a76155a6c35", | |
"29e76a510c8c46d0808ce49388c670ea", | |
"998658520adc4b6aa48b153794ff0be2", | |
"cc7ea3053cd04d659c2b5dc7373badfa", | |
"351dfdd986214391839d13c213fce093", | |
"d32bbb0aa04f4639a5d94dc7cea034c2", | |
"0d105cb2d94743f19bbea2c3bcda2bc5", | |
"7607e8c994b34a18a8981ef8a7ef8783", | |
"532aa34688e9423e94594d4967ef2022", | |
"1857a6d5f40143e6941b27353af710a4", | |
"7ae13d0e2fa545aaa4d0cbc02a756f76", | |
"842ca0565e144b498183ca469afcaeb5", | |
"ef65b485574d420c9d29ab713064df60", | |
"81d7e39ffc8f4e238a16e1e051d6a70a", | |
"687c5ad3bace41749062628140d6cef4", | |
"6bc98bc064bf46fabbc72425e6a04cde", | |
"5bdb424674d5428a9718968f7ccb417f", | |
"93a9a41588b140d59f423059745f0b74", | |
"e60dbeba37df4f0db8c81f8413f61307", | |
"e990efa20a9e4f589d80b59fb294457e", | |
"834352bbfa06497087ccc0fd282387c0", | |
"5d5f74c4c8144064b4608167be6bedcf", | |
"268be2da8a5a473d9cea08e3b9084b66", | |
"0b011360bbae45a3a7c2d222b93af477", | |
"3c9e08b0edda4dfda74df38cc3a7137b", | |
"889fb571413443a4adc255bbde65f337", | |
"f422ba05478b47b9b81f107c8591451e", | |
"1ab691653c744ee7a38a4d9988bd7e78", | |
"085dca686edb4aa9b115833ecf3de5aa", | |
"40846a21498d4c6c80c2a1d61c1e3a5a", | |
"6b570e5bf0eb4897be55c0498644bcb5", | |
"8cac7ad24f624936a5b7b663da0413dd", | |
"37d1a03a632e4eb887e93535e54a02fa", | |
"d0c3dcd59c1d4a95ba1d517e57d4bc75", | |
"7991b3f594ed49dfad4c3b099a9d0340", | |
"46715f152a8342e78e4877c99a579008", | |
"d81a1a65d90e4e1b9ef1822b58318a2f", | |
"f5421fc2d3cb43c3bfcb986c0aed330d", | |
"cd9fc73c223344fb89b9b2b9fd8d7e40", | |
"b1895ceb637241bfb039845ffa00cd9d", | |
"c9df6b93c24448b29bdb661a08ddc7a8", | |
"b675dca9a4034c21b7a15b35de53489d", | |
"f3b229ef0b1d4081844c3b3cda230278", | |
"2c6dcabb67cb44dd9a404b95ca420e25", | |
"2a309261c8674b78b5fdf624f560ddd6", | |
"c88b636452e24cc7b2b683778c64eca5", | |
"d5b2849304fe4064a650930a77f24b63", | |
"3a908ad25df0497d9d32bb93e9ac76b3", | |
"fd415a26ea2743ffa7fcedb3da165cfc", | |
"fd9a381814f34925ba39cffb24485695", | |
"0df6c55dfaad4e448e6ed3b676c7f3a2", | |
"6e0ac3e6f31e49a980815c8a691d6691", | |
"588fe7a3b69048cda86fc1855b94d2fe", | |
"23d8d7ba8f5540678b25ec8e002a7b22" | |
] | |
}, | |
"id": "HfBjG-0ZuPvi", | |
"outputId": "fdfa57db-5cb3-40ce-a139-179fb60889f0" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", | |
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n", | |
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", | |
"You will be able to reuse this secret in all of your notebooks.\n", | |
"Please note that authentication is recommended but still optional to access public models or datasets.\n", | |
" warnings.warn(\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "2223dc261ac84ed2ad14d037859f3ca5" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"config.json: 0%| | 0.00/570 [00:00<?, ?B/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "7ae13d0e2fa545aaa4d0cbc02a756f76" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "5d5f74c4c8144064b4608167be6bedcf" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "37d1a03a632e4eb887e93535e54a02fa" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"model.safetensors: 0%| | 0.00/440M [00:00<?, ?B/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "2c6dcabb67cb44dd9a404b95ca420e25" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", | |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Create a function to encode text using the language model:" | |
], | |
"metadata": { | |
"id": "2nTwEIeLuVBE" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def encode_text(text, tokenizer, model):\n", | |
" inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=512)\n", | |
" with torch.no_grad():\n", | |
" outputs = model(**inputs)\n", | |
" return outputs.logits.mean(dim=1).cpu().numpy()\n" | |
], | |
"metadata": { | |
"id": "UXLQH5zeuTB0" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Prepare a dataset of question-answer pairs and encode them:" | |
], | |
"metadata": { | |
"id": "ptKZt4ixuZBM" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Replace this with your actual dataset\n", | |
"qas = [\n", | |
" (\"What is the capital of France?\", \"Paris\"),\n", | |
" # Add more question-answer pairs here\n", | |
"]\n", | |
"\n", | |
"# Encode question-answer pairs\n", | |
"encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in qas]\n" | |
], | |
"metadata": { | |
"id": "Xk3CPu-auX3y" | |
}, | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Build a FAISS index for efficient similarity search:" | |
], | |
"metadata": { | |
"id": "cPW9fezDufK1" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"dimension = encoded_qas[0].shape[0]\n", | |
"index = faiss.IndexFlatL2(dimension)\n", | |
"index.add(np.array(encoded_qas))\n" | |
], | |
"metadata": { | |
"id": "6_vk8Bi2ugtV" | |
}, | |
"execution_count": 13, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Implement a function to retrieve the top-k most similar question-answer pairs:" | |
], | |
"metadata": { | |
"id": "BroJVAwJupye" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def retrieve_top_k(query, index, k):\n", | |
" encoded_query = encode_text(query, tokenizer, model)\n", | |
" distances, indices = index.search(np.array([encoded_query]), k)\n", | |
" return [(qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n" | |
], | |
"metadata": { | |
"id": "k1bEwKjCurvd" | |
}, | |
"execution_count": 14, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Implement a re-ranking function using another language model or a custom scoring function:" | |
], | |
"metadata": { | |
"id": "GTK7Uo02uuT9" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def re_rank(candidates, query):\n", | |
" # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n", | |
" # For simplicity, we'll just sort the candidates by their original distances\n", | |
" return sorted(candidates, key=lambda x: x[1])\n" | |
], | |
"metadata": { | |
"id": "p14aFDYCuxdS" | |
}, | |
"execution_count": 15, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Test the retrieval and re-ranking:" | |
], | |
"metadata": { | |
"id": "TmFnZ1lHvIam" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"query = \"Where is the capital of France?\"\n", | |
"top_k = 5\n", | |
"\n", | |
"# Retrieve top-k candidates\n", | |
"candidates = retrieve_top_k(query, index, top_k)\n", | |
"\n", | |
"# Re-rank the candidates\n", | |
"re_ranked_candidates = re_rank(candidates, query)\n", | |
"\n", | |
"# Print the re-ranked results\n", | |
"for qa, distance in re_ranked_candidates:\n", | |
" print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "tToEEPQ9vK9T", | |
"outputId": "aee62066-ff9d-4e01-d618-53a785c84697" | |
}, | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Question: What is the capital of France?\n", | |
"Answer: Paris\n", | |
"Distance: 0.0031302475836127996\n", | |
"\n", | |
"Question: What is the capital of France?\n", | |
"Answer: Paris\n", | |
"Distance: 3.4028234663852886e+38\n", | |
"\n", | |
"Question: What is the capital of France?\n", | |
"Answer: Paris\n", | |
"Distance: 3.4028234663852886e+38\n", | |
"\n", | |
"Question: What is the capital of France?\n", | |
"Answer: Paris\n", | |
"Distance: 3.4028234663852886e+38\n", | |
"\n", | |
"Question: What is the capital of France?\n", | |
"Answer: Paris\n", | |
"Distance: 3.4028234663852886e+38\n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# IPL dataset of question-answer pairs\n", | |
"ipl_qas = [\n", | |
" (\"Which team won the first IPL season?\", \"Rajasthan Royals\"),\n", | |
" (\"Who scored the highest individual score in IPL?\", \"Chris Gayle (175*)\"),\n", | |
" (\"Who is the leading run-scorer in IPL history?\", \"Virat Kohli\"),\n", | |
" (\"Who is the leading wicket-taker in IPL history?\", \"Lasith Malinga\"),\n", | |
" (\"Which team has won the most IPL titles?\", \"Mumbai Indians\")\n", | |
"]\n", | |
"\n", | |
"# Encode question-answer pairs\n", | |
"encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in ipl_qas]\n" | |
], | |
"metadata": { | |
"id": "fkUmgvaexFQS" | |
}, | |
"execution_count": 11, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Build a FAISS index for efficient similarity search\n", | |
"dimension = encoded_qas[0].shape[0]\n", | |
"index = faiss.IndexFlatL2(dimension)\n", | |
"index.add(np.array(encoded_qas))\n", | |
"\n", | |
"# Implement a function to retrieve the top-k most similar question-answer pairs\n", | |
"def retrieve_top_k(query, index, k):\n", | |
" encoded_query = encode_text(query, tokenizer, model)\n", | |
" distances, indices = index.search(np.array([encoded_query]), k)\n", | |
" return [(ipl_qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n", | |
"\n", | |
"# Implement a re-ranking function using another language model or a custom scoring function\n", | |
"def re_rank(candidates, query):\n", | |
" # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n", | |
" # For simplicity, we'll just sort the candidates by their original distances\n", | |
" return sorted(candidates, key=lambda x: x[1])\n", | |
"\n", | |
"# Test the retrieval and re-ranking with a sample query\n", | |
"query = \"who is the caption of csk \"\n", | |
"top_k = 3\n", | |
"\n", | |
"# Retrieve top-k candidates\n", | |
"candidates = retrieve_top_k(query, index, top_k)\n", | |
"\n", | |
"# Re-rank the candidates\n", | |
"re_ranked_candidates = re_rank(candidates, query)\n", | |
"\n", | |
"# Print the re-ranked results\n", | |
"for qa, distance in re_ranked_candidates:\n", | |
" print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")\n", | |
"\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "zOh-ThfSvP_P", | |
"outputId": "6931c2cf-9e7f-4b7d-b666-6490a7867b07" | |
}, | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Question: Who is the leading wicket-taker in IPL history?\n", | |
"Answer: Lasith Malinga\n", | |
"Distance: 0.02710317261517048\n", | |
"\n", | |
"Question: Who scored the highest individual score in IPL?\n", | |
"Answer: Chris Gayle (175*)\n", | |
"Distance: 0.02815784327685833\n", | |
"\n", | |
"Question: Which team has won the most IPL titles?\n", | |
"Answer: Mumbai Indians\n", | |
"Distance: 0.03018650971353054\n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"! pip install wikipedia~=1.4.0" | |
], | |
"metadata": { | |
"id": "kg0IcRDXy6nv" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install langchain~=0.1.16" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "eq934O_yy4w5", | |
"outputId": "b1d4abc1-f876-4517-ec5c-2d973a8db9da" | |
}, | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Collecting langchain~=0.1.16\n", | |
" Downloading langchain-0.1.17-py3-none-any.whl (867 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m867.6/867.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (6.0.1)\n", | |
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.0.29)\n", | |
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (3.9.5)\n", | |
"Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (4.0.3)\n", | |
"Collecting dataclasses-json<0.7,>=0.5.7 (from langchain~=0.1.16)\n", | |
" Downloading dataclasses_json-0.6.5-py3-none-any.whl (28 kB)\n", | |
"Collecting jsonpatch<2.0,>=1.33 (from langchain~=0.1.16)\n", | |
" Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", | |
"Collecting langchain-community<0.1,>=0.0.36 (from langchain~=0.1.16)\n", | |
" Downloading langchain_community-0.0.36-py3-none-any.whl (2.0 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting langchain-core<0.2.0,>=0.1.48 (from langchain~=0.1.16)\n", | |
" Downloading langchain_core-0.1.50-py3-none-any.whl (302 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.8/302.8 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain~=0.1.16)\n", | |
" Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n", | |
"Collecting langsmith<0.2.0,>=0.1.17 (from langchain~=0.1.16)\n", | |
" Downloading langsmith-0.1.54-py3-none-any.whl (116 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.7/116.7 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (1.25.2)\n", | |
"Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.7.1)\n", | |
"Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.31.0)\n", | |
"Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (8.2.3)\n", | |
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.3.1)\n", | |
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (23.2.0)\n", | |
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.4.1)\n", | |
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (6.0.5)\n", | |
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.9.4)\n", | |
"Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n", | |
" Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n", | |
" Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", | |
"Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain~=0.1.16)\n", | |
" Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", | |
"Collecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.48->langchain~=0.1.16)\n", | |
" Downloading packaging-23.2-py3-none-any.whl (53 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain~=0.1.16)\n", | |
" Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (0.6.0)\n", | |
"Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (2.18.2)\n", | |
"Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (4.11.0)\n", | |
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (3.3.2)\n", | |
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (3.7)\n", | |
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (2.0.7)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (2024.2.2)\n", | |
"Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain~=0.1.16) (3.0.3)\n", | |
"Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n", | |
" Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", | |
"Installing collected packages: packaging, orjson, mypy-extensions, jsonpointer, typing-inspect, marshmallow, jsonpatch, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain-community, langchain\n", | |
" Attempting uninstall: packaging\n", | |
" Found existing installation: packaging 24.0\n", | |
" Uninstalling packaging-24.0:\n", | |
" Successfully uninstalled packaging-24.0\n", | |
"Successfully installed dataclasses-json-0.6.5 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.17 langchain-community-0.0.36 langchain-core-0.1.50 langchain-text-splitters-0.0.1 langsmith-0.1.54 marshmallow-3.21.2 mypy-extensions-1.0.0 orjson-3.10.3 packaging-23.2 typing-inspect-0.9.0\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"\n", | |
"# from langchain_community.document_loaders import WikipediaLoader\n", | |
"\n", | |
"pages = WikipediaLoader(query=\"Harry Potter\", load_max_docs=3, lang=\"en\").load()\n", | |
"\n", | |
"import nltk\n", | |
"from nltk.corpus import stopwords\n", | |
"from nltk.tokenize import sent_tokenize, word_tokenize\n", | |
"\n", | |
"nltk.download(\"punkt\")\n", | |
"nltk.download(\"stopwords\")\n", | |
"\n", | |
"# Load Wikipedia pages\n", | |
"pages = WikipediaLoader(query=\"Harry Potter\", load_max_docs=3, lang=\"en\").load()\n", | |
"\n", | |
"# Preprocess the data\n", | |
"# Preprocess the data\n", | |
"stop_words = set(stopwords.words(\"english\"))\n", | |
"wiki_qas = []\n", | |
"\n", | |
"for page in pages:\n", | |
" sentences = sent_tokenize(page.page_content)\n", | |
" for sentence in sentences:\n", | |
" words = word_tokenize(sentence)\n", | |
" filtered_words = [word for word in words if word.lower() not in stop_words and word.isalnum()]\n", | |
" question = \" \".join(filtered_words)\n", | |
" answer = sentence\n", | |
" if answer.strip(): # Check if the answer is not empty\n", | |
" wiki_qas.append((question, answer))\n", | |
"\n", | |
"# Encode question-answer pairs\n", | |
"encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in wiki_qas]\n", | |
"\n", | |
"encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in wiki_qas]\n", | |
"\n", | |
"# Build a FAISS index for efficient similarity search\n", | |
"dimension = encoded_qas[0].shape[0]\n", | |
"index = faiss.IndexFlatL2(dimension)\n", | |
"index.add(np.array(encoded_qas))\n", | |
"\n", | |
"# Use the retrieve_top_k and re_rank functions provided in the previous code examples\n", | |
"\n", | |
"# Test the retrieval and re-ranking with a sample query\n", | |
"query = \"Who is Harry Potter?\"\n", | |
"top_k = 3\n", | |
"\n", | |
"def re_rank(candidates, query, num_candidates):\n", | |
" # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n", | |
" # For simplicity, we'll just sort the candidates by their original distances\n", | |
" return sorted(candidates[:num_candidates], key=lambda x: x[1])\n", | |
"\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "mbMysjGWyLpJ", | |
"outputId": "e7d942ed-d891-4a12-9d79-9543ffc87cfa" | |
}, | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package punkt to /root/nltk_data...\n", | |
"[nltk_data] Package punkt is already up-to-date!\n", | |
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n", | |
"[nltk_data] Package stopwords is already up-to-date!\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def retrieve_top_k(query, index, k):\n", | |
" encoded_query = encode_text(query, tokenizer, model)\n", | |
" distances, indices = index.search(np.array([encoded_query]), k)\n", | |
" return [(wiki_qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n" | |
], | |
"metadata": { | |
"id": "3sm9-aFA09Ts" | |
}, | |
"execution_count": 29, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"candidates = retrieve_top_k(query, index, len(encoded_qas))\n", | |
"\n", | |
"# Re-rank the candidates\n", | |
"num_candidates = min(top_k, len(candidates))\n", | |
"re_ranked_candidates = re_rank(candidates, query, num_candidates)\n", | |
"\n", | |
"# Print the re-ranked results\n", | |
"for qa, distance in re_ranked_candidates:\n", | |
" print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "bL4aAqNy0g6D", | |
"outputId": "ff925a1d-a699-4652-91d6-1ae8ea6f42e0" | |
}, | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Question: series mainly produced David Heyman stars Daniel Radcliffe Rupert Grint Emma Watson three leading characters Harry Potter Ron Weasley Hermione Granger\n", | |
"Answer: The series was mainly produced by David Heyman, and stars Daniel Radcliffe, Rupert Grint, and Emma Watson as the three leading characters: Harry Potter, Ron Weasley, and Hermione Granger.\n", | |
"Distance: 7.031151199043961e-06\n", | |
"\n", | |
"Question: first book Harry Potter Philosopher Stone Harry lives cupboard stairs house Dursleys aunt uncle cousin treat poorly\n", | |
"Answer: In the first book, Harry Potter and the Philosopher's Stone, Harry lives in a cupboard under the stairs in the house of the Dursleys, his aunt, uncle and cousin, who all treat him poorly.\n", | |
"Distance: 3.793924770434387e-05\n", | |
"\n", | |
"Question: Harry Potter series used source object lessons educational techniques sociological analysis marketing\n", | |
"Answer: The Harry Potter series has been used as a source of object lessons in educational techniques, sociological analysis and marketing.\n", | |
"Distance: 0.00018532716785557568\n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [], | |
"metadata": { | |
"id": "VqenpHe3yK4a" | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment