Created
August 24, 2021 10:03
-
-
Save avidale/cacf235aebeaaf4c578389e1146c3c57 to your computer and use it in GitHub Desktop.
Bert-NER-ru
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Bert-NER-ru", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.9" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"38effe6811e0445ea6a06fbf62322cb2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_9c3e627f3f214708a0eada8855e345e1", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_4b4eaf184a9a482798ac08ff5febafb9", | |
"IPY_MODEL_c3566754043543218423794f629214d6", | |
"IPY_MODEL_b92ff60477c043e0b2bdaea88a70295b" | |
] | |
} | |
}, | |
"9c3e627f3f214708a0eada8855e345e1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"4b4eaf184a9a482798ac08ff5febafb9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_5e07fb58967a4d03931682646417f324", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: 100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_c8847e2fd0654eabb62848c927685ba4" | |
} | |
}, | |
"c3566754043543218423794f629214d6": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_bb59a31387e84bc18937ca0aafb36230", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 341, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 341, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_7acb711b2f994c0c8c292f7735ccfd15" | |
} | |
}, | |
"b92ff60477c043e0b2bdaea88a70295b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_93c12e9ea0294f70b4dca310239b55e4", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 341/341 [00:00<00:00, 6.59kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_44595961fd42457496803732f50e813b" | |
} | |
}, | |
"5e07fb58967a4d03931682646417f324": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"c8847e2fd0654eabb62848c927685ba4": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"bb59a31387e84bc18937ca0aafb36230": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"7acb711b2f994c0c8c292f7735ccfd15": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"93c12e9ea0294f70b4dca310239b55e4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"44595961fd42457496803732f50e813b": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"97db5fbd0b134002a15f1e89b2e2a871": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_4ac3dbe72f9b4dd2b27d91630736fbcc", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_937d10a0f080492ab64efbb53f6ebe83", | |
"IPY_MODEL_3dab395a7da545838fb6c54fc19420cc", | |
"IPY_MODEL_712ed73184ec4551a6891862b3295b22" | |
] | |
} | |
}, | |
"4ac3dbe72f9b4dd2b27d91630736fbcc": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"937d10a0f080492ab64efbb53f6ebe83": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_5281ff75b7144218a6218c65d8789f48", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: 100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_bedec2ca0adc4239b35f94a23a9b3cdd" | |
} | |
}, | |
"3dab395a7da545838fb6c54fc19420cc": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_8263aecd8ead4ceabf6112e536ac6dbd", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 632, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 632, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_e7f42df803cc4ed6ae43ead43e283ed8" | |
} | |
}, | |
"712ed73184ec4551a6891862b3295b22": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_57770139538d40c38c68112f622993f3", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 632/632 [00:00<00:00, 20.9kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_c3caeafc16f44882938d7c1dcb56cb18" | |
} | |
}, | |
"5281ff75b7144218a6218c65d8789f48": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"bedec2ca0adc4239b35f94a23a9b3cdd": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"8263aecd8ead4ceabf6112e536ac6dbd": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"e7f42df803cc4ed6ae43ead43e283ed8": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"57770139538d40c38c68112f622993f3": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"c3caeafc16f44882938d7c1dcb56cb18": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"0389bbf4dde74c4db0117a6f66c8808f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_bb94580fba5140078f7a4ac289308e6f", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_ff1d724506ef4ab1a73c4a6635a95cdb", | |
"IPY_MODEL_6568a4b7ebba4b66ae324cfb1e4c6c56", | |
"IPY_MODEL_37f3d4726084402ba1ff26773197b415" | |
] | |
} | |
}, | |
"bb94580fba5140078f7a4ac289308e6f": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"ff1d724506ef4ab1a73c4a6635a95cdb": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_6f02d9d6f80249e4949123b465d56584", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: 100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_f5e2e39b8c344f81b40f43c7645b6fae" | |
} | |
}, | |
"6568a4b7ebba4b66ae324cfb1e4c6c56": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_2f8e9faf6afe4b278a4889a13e29fd68", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 241082, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 241082, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_ff12a26f63e643fbb301dffe15f375cf" | |
} | |
}, | |
"37f3d4726084402ba1ff26773197b415": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_2fd0ded25025401d917bc2624b23a5ce", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 241k/241k [00:00<00:00, 3.11MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_fd461fa9ee1249dbb49e10bf7201f9d9" | |
} | |
}, | |
"6f02d9d6f80249e4949123b465d56584": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"f5e2e39b8c344f81b40f43c7645b6fae": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"2f8e9faf6afe4b278a4889a13e29fd68": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"ff12a26f63e643fbb301dffe15f375cf": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"2fd0ded25025401d917bc2624b23a5ce": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"fd461fa9ee1249dbb49e10bf7201f9d9": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"91e9fdb905864efa8d42ee7cb3680e08": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_d8aa99a5f7cb42b9b3ca28edb0b0a007", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_0f4c5d11ce814b83bb85389b3c5a4c5f", | |
"IPY_MODEL_62ae82032a8a403b989ee8e0ab06f58a", | |
"IPY_MODEL_89ccf6c81454461c9a94ee0b9820d4a9" | |
] | |
} | |
}, | |
"d8aa99a5f7cb42b9b3ca28edb0b0a007": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"0f4c5d11ce814b83bb85389b3c5a4c5f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_c76403b23ddd4fd5a644e63e21da8358", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: 100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_303a6382c4ff45439a40c47e969306da" | |
} | |
}, | |
"62ae82032a8a403b989ee8e0ab06f58a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_24bff1add1804f5ea46b5edaf4ca7428", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 468145, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 468145, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_6e2ff1b9214049188ac0587fb33b5352" | |
} | |
}, | |
"89ccf6c81454461c9a94ee0b9820d4a9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_5f25f5e5ad604408bcb53d0b70f67f20", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 468k/468k [00:00<00:00, 5.63MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_76260400334d4c4f8985e69b4800ae28" | |
} | |
}, | |
"c76403b23ddd4fd5a644e63e21da8358": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"303a6382c4ff45439a40c47e969306da": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"24bff1add1804f5ea46b5edaf4ca7428": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"6e2ff1b9214049188ac0587fb33b5352": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5f25f5e5ad604408bcb53d0b70f67f20": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"76260400334d4c4f8985e69b4800ae28": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"21fdf7eb6fb94da0bc9639b3f4ea7f00": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_20b1444148dc40f89b107e30dbcf6c0b", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_36dc0d7a19c04a47a06618e187ee894a", | |
"IPY_MODEL_791e009541a64d749330e6123ca7d87f", | |
"IPY_MODEL_06c63accbd2a4903b762ed21545bfbbe" | |
] | |
} | |
}, | |
"20b1444148dc40f89b107e30dbcf6c0b": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"36dc0d7a19c04a47a06618e187ee894a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_07fee35962004c8996c8acef923292eb", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: 100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_b3332e5f66ad4c6c830f28bc290cd4bd" | |
} | |
}, | |
"791e009541a64d749330e6123ca7d87f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_b831ba8c276b4a1bb0ef7ae16a7a8fc9", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 112, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 112, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_7eff20962dfe422b9523c4e74f5372aa" | |
} | |
}, | |
"06c63accbd2a4903b762ed21545bfbbe": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_663c10e13a0e47f7b115ad50bd5b3965", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 112/112 [00:00<00:00, 3.03kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_997504803ac5445588c07cf97049d14a" | |
} | |
}, | |
"07fee35962004c8996c8acef923292eb": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"b3332e5f66ad4c6c830f28bc290cd4bd": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"b831ba8c276b4a1bb0ef7ae16a7a8fc9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"7eff20962dfe422b9523c4e74f5372aa": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"663c10e13a0e47f7b115ad50bd5b3965": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"997504803ac5445588c07cf97049d14a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"e16827a6f03a4b92889daf18d9917126": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_7b3185264cfe469683ad9cc81b0a8484", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_20ea5f6227b041969b1ce0d686a39121", | |
"IPY_MODEL_78818c7330fd489c8820d845afab2fca", | |
"IPY_MODEL_19c53fdf63a0408e8ad85e56a94d7dcd" | |
] | |
} | |
}, | |
"7b3185264cfe469683ad9cc81b0a8484": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"20ea5f6227b041969b1ce0d686a39121": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_43d9892c92f343369d714477c706d45a", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_91f00c3eed1b41d288a53cf829f88555" | |
} | |
}, | |
"78818c7330fd489c8820d845afab2fca": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_f2f2a65d8c5d4627855526eccf8c68d7", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 4, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 4, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_2c4a9988fa90474ba9aa1f48bf03704a" | |
} | |
}, | |
"19c53fdf63a0408e8ad85e56a94d7dcd": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_ec76c271eccd45c7b8d28a15274b1d50", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 4/4 [00:00<00:00, 6.93ba/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_eefb8e1e66ff486e92c0ce618c12be66" | |
} | |
}, | |
"43d9892c92f343369d714477c706d45a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"91f00c3eed1b41d288a53cf829f88555": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"f2f2a65d8c5d4627855526eccf8c68d7": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"2c4a9988fa90474ba9aa1f48bf03704a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"ec76c271eccd45c7b8d28a15274b1d50": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"eefb8e1e66ff486e92c0ce618c12be66": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"b27b3845581c4dcba258672ecde20982": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_16091398feea4bfb8e5c07a679934a3e", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_b6f776fb81874175a00f9d4569edf89c", | |
"IPY_MODEL_9ae961550c684fbda20bbec6043ca80e", | |
"IPY_MODEL_54e1fd908fed41c981e8bb39068da20c" | |
] | |
} | |
}, | |
"16091398feea4bfb8e5c07a679934a3e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"b6f776fb81874175a00f9d4569edf89c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_c46c29c17e5d41128bb8bc401c5c4c8c", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_d0470494b6be4bf0af4fc7007e285149" | |
} | |
}, | |
"9ae961550c684fbda20bbec6043ca80e": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_9c406cfcfcf9424181939d2f6009090a", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 1, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 1, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_d2e8a291b9c54a45ba9f812ec6d19fcc" | |
} | |
}, | |
"54e1fd908fed41c981e8bb39068da20c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_6fb72f595fb14608ae7e6a20c2e410a9", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 1/1 [00:00<00:00, 5.44ba/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_e174a5cbd06441329ccd8cb547b44503" | |
} | |
}, | |
"c46c29c17e5d41128bb8bc401c5c4c8c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"d0470494b6be4bf0af4fc7007e285149": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"9c406cfcfcf9424181939d2f6009090a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"d2e8a291b9c54a45ba9f812ec6d19fcc": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"6fb72f595fb14608ae7e6a20c2e410a9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"e174a5cbd06441329ccd8cb547b44503": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"178b2e70a03141c3a8d14c03b1024d34": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_a2a75ec023b64fb0813a0d3b9da549a3", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_6446a516afdf4406a5a8c876aa2a0179", | |
"IPY_MODEL_787dde6c71aa4303a3f1ae908bdf3288", | |
"IPY_MODEL_04e795570e2245e2844e7acfd36611e4" | |
] | |
} | |
}, | |
"a2a75ec023b64fb0813a0d3b9da549a3": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"6446a516afdf4406a5a8c876aa2a0179": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_a0441eab19d441d7ae60ea926a1ddabe", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: 100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_00a1bad590e34fc09067414ed4bae1d9" | |
} | |
}, | |
"787dde6c71aa4303a3f1ae908bdf3288": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_5b3223766ec44a0781e88c6f0d276c12", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 47679974, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 47679974, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_085cbbc409874d57b38930b6b05ecfd9" | |
} | |
}, | |
"04e795570e2245e2844e7acfd36611e4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_1a255ac062e94624a2ecfb4f58889d74", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 47.7M/47.7M [00:01<00:00, 47.3MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_1d51207792fd4afb849e2ae72ddd68ce" | |
} | |
}, | |
"a0441eab19d441d7ae60ea926a1ddabe": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"00a1bad590e34fc09067414ed4bae1d9": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5b3223766ec44a0781e88c6f0d276c12": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"085cbbc409874d57b38930b6b05ecfd9": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"1a255ac062e94624a2ecfb4f58889d74": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"1d51207792fd4afb849e2ae72ddd68ce": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"bba28fea430d436981b0bfab06fb4ee6": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_fe16833673ba4dc79001d9b3d28eb6d2", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_513e9fa4a8a04f889d68ac7658818465", | |
"IPY_MODEL_b38e9ab8c6fa4ea1952a2265dcdcfff5", | |
"IPY_MODEL_91c93a7307854ac98d9bdf6746286517" | |
] | |
} | |
}, | |
"fe16833673ba4dc79001d9b3d28eb6d2": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"513e9fa4a8a04f889d68ac7658818465": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_c291de1c9ba343ff8e140a8c7ef1496a", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "Downloading: ", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_b1e72abd13c84f08b763eae36928fb4e" | |
} | |
}, | |
"b38e9ab8c6fa4ea1952a2265dcdcfff5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_251fc9cb4f574aa4881b722cef11324a", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 2482, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 2482, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_71c5d3acf8674d77b38d6fa6b0aba8d5" | |
} | |
}, | |
"91c93a7307854ac98d9bdf6746286517": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_94419c78831745928f8e5327f53ef5e0", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 6.34k/? [00:00<00:00, 157kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_31a5bb08896f47309e658dac697b6680" | |
} | |
}, | |
"c291de1c9ba343ff8e140a8c7ef1496a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"b1e72abd13c84f08b763eae36928fb4e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"251fc9cb4f574aa4881b722cef11324a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"71c5d3acf8674d77b38d6fa6b0aba8d5": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"94419c78831745928f8e5327f53ef5e0": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"31a5bb08896f47309e658dac697b6680": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/avidale/cacf235aebeaaf4c578389e1146c3c57/bert-ner-ru.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "X4cRE8IbIrIV" | |
}, | |
"source": [ | |
"Основано на блокноте https://github.com/huggingface/notebooks/blob/master/examples/token_classification.ipynb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MOsHUjgdIrIW" | |
}, | |
"source": [ | |
"! pip install datasets transformers seqeval" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "4HL1yaESsJA9" | |
}, | |
"source": [ | |
"В этом блокноте мы дообучаем модель на задаче классификации отдельных слов, а именно, распознавание именованных сущностей (aka named entity recognition, aka NER). Мы возьмём датасет медицинских сущностей, но в целом пайплайн подходит для любой задачи на выделение сущностей в тексте. \n", | |
"\n", | |
"Для скорости мы возьмём маленький BERT для русского языка [rubert-tiny](https://huggingface.co/cointegrated/rubert-tiny); если взять другую, более крупную BERT-подобную модель, качество NER может быть выше, но и время обучения и работы будет дольше \n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "4RRkXuteIrIh" | |
}, | |
"source": [ | |
"This notebook is built to run on any token classification task, with any model checkpoint from the [Model Hub](https://huggingface.co/models) as long as that model has a version with a token classification head and a fast tokenizer (check on [this table](https://huggingface.co/transformers/index.html#bigtable) if this is the case). It might just need some small adjustments if you decide to use a different dataset than the one used here. Depending on you model and the GPU you are using, you might need to adjust the batch size to avoid out-of-memory errors. Set those three parameters, then the rest of the notebook should run smoothly:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zVvslsfMIrIh" | |
}, | |
"source": [ | |
"model_checkpoint = \"cointegrated/rubert-tiny\"\n", | |
"batch_size = 16" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "whPRbBNbIrIl" | |
}, | |
"source": [ | |
"## Loading the dataset" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "J8mt63rWvkv3" | |
}, | |
"source": [ | |
"Для обучения мы возьмём [Russian Drug Reaction Corpus](https://github.com/cimm-kzn/RuDReC): размеченный корпус русскоязычных отзывов на лекарства. \n", | |
"\n", | |
"Загрузим мы его библиотекой corus, потому что это удобно " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IreSlFmlIrIm" | |
}, | |
"source": [ | |
"from datasets import load_dataset, load_metric" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "s_AY1ATSIrIq" | |
}, | |
"source": [ | |
"!wget https://github.com/cimm-kzn/RuDReC/raw/master/data/rudrec_annotated.json\n", | |
"!pip install corus razdel" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "VALH-KBTMfVI", | |
"outputId": "385f3c15-6aa7-4c9a-bd02-8db5c3a593fb" | |
}, | |
"source": [ | |
"from corus import load_rudrec\n", | |
"drugs = list(load_rudrec('rudrec_annotated.json'))\n", | |
"print(len(drugs))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"4809\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "fGBywJmAv2NN" | |
}, | |
"source": [ | |
"Пример документа:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ynPlkV5gv4XC", | |
"outputId": "e3fe1c20-6f9d-4921-d56b-71d810de8143" | |
}, | |
"source": [ | |
"drugs[0]" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"RuDReCRecord(\n", | |
" file_name='172744.tsv',\n", | |
" text='нам прописали, так мой ребенок сыпью покрылся, глаза опухли, сверху и снизу на веках высыпала сыпь, ( 8 месяцев сыну)А от виферона такого не было... У кого ещё такие побочки, отзовитесь!1 Чем спасались?\\n',\n", | |
" sentence_id=0,\n", | |
" entities=[RuDReCEntity(\n", | |
" entity_id='*[0]_se',\n", | |
" entity_text='виферона',\n", | |
" entity_type='Drugform',\n", | |
" start=122,\n", | |
" end=130,\n", | |
" concept_id='C0021735',\n", | |
" concept_name=nan\n", | |
" ), RuDReCEntity(\n", | |
" entity_id='*[1]',\n", | |
" entity_text='сыпью покрылся',\n", | |
" entity_type='ADR',\n", | |
" start=31,\n", | |
" end=45,\n", | |
" concept_id='C0015230',\n", | |
" concept_name=nan\n", | |
" ), RuDReCEntity(\n", | |
" entity_id='*[2]',\n", | |
" entity_text='глаза опухли',\n", | |
" entity_type='ADR',\n", | |
" start=47,\n", | |
" end=59,\n", | |
" concept_id='C4760994',\n", | |
" concept_name=nan\n", | |
" ), RuDReCEntity(\n", | |
" entity_id='*[3]',\n", | |
" entity_text='на веках высыпала сыпь',\n", | |
" entity_type='ADR',\n", | |
" start=76,\n", | |
" end=98,\n", | |
" concept_id='C0015230',\n", | |
" concept_name=nan\n", | |
" )]\n", | |
")" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "iSpV6RLEwI5o" | |
}, | |
"source": [ | |
"Посмотрим, какие сущности есть: лекарства, форма лекарств, класс лекарств, показания к применению, побочки, и прочие болезни/симптомы.\n", | |
"\n", | |
"https://arxiv.org/abs/2004.03659\n", | |
"\n", | |
"* **DRUGNAME** Mentions of the brand name of a drug or product\n", | |
"ingredients/active compounds.\n", | |
"* **DRUGCLASS** Mentions of drug classes such as anti-inflammatory or\n", | |
"cardiovascular.\n", | |
"* **DRUGFORM** Mentions of routes of administration such as tablet\n", | |
"or liquid that describe the physical form in which\n", | |
"medication will be delivered into patient’s organism.\n", | |
"* **DI** Any indication/symptom that specifies the reason for\n", | |
"taking/prescribing the drug.\n", | |
"* **ADR** Mentions of untoward medical events that occur as a\n", | |
"consequence of drug intake and are not associated with\n", | |
"treated symptoms.\n", | |
"* **FINDING** Any DI or ADR that was not directly experienced by the\n", | |
"reporting patient or his/her family members, or related to\n", | |
"medical history/drug label, or any disease entities if the\n", | |
"annotator is not clear about type" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "vo8MIceYNPjx", | |
"outputId": "23d8f04f-f7d3-4a20-b840-a8b915b8ab47" | |
}, | |
"source": [ | |
"from collections import Counter, defaultdict\n", | |
"type2text = defaultdict(Counter)\n", | |
"ents = Counter()\n", | |
"for item in drugs:\n", | |
" for e in item.entities:\n", | |
" ents[e.entity_type] += 1\n", | |
" type2text[e.entity_type][e.entity_text] += 1\n", | |
"\n", | |
"for k, v in ents.most_common():\n", | |
" print(k, v)\n", | |
" print(type2text[k].most_common(3))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"DI 1401\n", | |
"[('простуды', 64), ('ОРВИ', 47), ('профилактики', 42)]\n", | |
"Drugname 1043\n", | |
"[('Виферон', 33), ('Анаферон', 25), ('Циклоферон', 24)]\n", | |
"Drugform 836\n", | |
"[('таблетки', 154), ('таблеток', 79), ('свечи', 63)]\n", | |
"ADR 720\n", | |
"[('аллергия', 16), ('слабость', 13), ('диарея', 12)]\n", | |
"Drugclass 330\n", | |
"[('противовирусный', 21), ('противовирусное', 18), ('противовирусных', 13)]\n", | |
"Finding 236\n", | |
"[('аллергии', 12), ('температуры', 6), ('сонливости', 5)]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 52 | |
}, | |
"id": "0Kszaqs8N0Ig", | |
"outputId": "34a697ef-c96d-40bb-c6ed-fe6f2c979ab1" | |
}, | |
"source": [ | |
"drugs[0].text" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
}, | |
"text/plain": [ | |
"'нам прописали, так мой ребенок сыпью покрылся, глаза опухли, сверху и снизу на веках высыпала сыпь, ( 8 месяцев сыну)А от виферона такого не было... У кого ещё такие побочки, отзовитесь!1 Чем спасались?\\n'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "RzfPtOMoIrIu" | |
}, | |
"source": [ | |
"Напишем функцию, перекладывающую разметку сущностей на уровень слов. Будем использовать [IOB](https://en.wikipedia.org/wiki/Inside–outside–beginning_(tagging))-нотацию, чтобы разделять несколько сущностей одного типа, идущих подряд. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Dg9BL4Z_OcjY" | |
}, | |
"source": [ | |
"from razdel import tokenize\n", | |
"\n", | |
"def extract_labels(item):\n", | |
" raw_toks = list(tokenize(item.text))\n", | |
" words = [tok.text for tok in raw_toks]\n", | |
" word_labels = ['O'] * len(raw_toks)\n", | |
" char2word = [None] * len(item.text)\n", | |
" for i, word in enumerate(raw_toks):\n", | |
" char2word[word.start:word.stop] = [i] * len(word.text)\n", | |
"\n", | |
" for e in item.entities:\n", | |
" e_words = sorted({idx for idx in char2word[e.start:e.end] if idx is not None})\n", | |
" word_labels[e_words[0]] = 'B-' + e.entity_type\n", | |
" for idx in e_words[1:]:\n", | |
" word_labels[idx] = 'I-' + e.entity_type\n", | |
"\n", | |
" return {'tokens': words, 'tags': word_labels}" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "PCGwQAadOVA9", | |
"outputId": "cb55c0b3-bdc5-4b5c-feae-c560c38554cd" | |
}, | |
"source": [ | |
"print(extract_labels(drugs[0]))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"{'tokens': ['нам', 'прописали', ',', 'так', 'мой', 'ребенок', 'сыпью', 'покрылся', ',', 'глаза', 'опухли', ',', 'сверху', 'и', 'снизу', 'на', 'веках', 'высыпала', 'сыпь', ',', '(', '8', 'месяцев', 'сыну', ')', 'А', 'от', 'виферона', 'такого', 'не', 'было', '...', 'У', 'кого', 'ещё', 'такие', 'побочки', ',', 'отзовитесь', '!', '1', 'Чем', 'спасались', '?'], 'tags': ['O', 'O', 'O', 'O', 'O', 'O', 'B-ADR', 'I-ADR', 'O', 'B-ADR', 'I-ADR', 'O', 'O', 'O', 'O', 'B-ADR', 'I-ADR', 'I-ADR', 'I-ADR', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Drugform', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']}\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Chhlmjt8OEgn" | |
}, | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"ner_data = [extract_labels(item) for item in drugs]\n", | |
"ner_train, ner_test = train_test_split(ner_data, test_size=0.2, random_state=1)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yvApziHbyUyR" | |
}, | |
"source": [ | |
"Пример данных" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 137 | |
}, | |
"id": "17yA19oFRwMk", | |
"outputId": "0f5322ef-f6cf-4099-a34c-d298a3a72f72" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"pd.options.display.max_colwidth = 300\n", | |
"pd.DataFrame(ner_train).sample(3)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tokens</th>\n", | |
" <th>tags</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>3132</th>\n", | |
" <td>[Но, в, 3, месяца, нам, ставили, гипертонус, ручек, и, ножек, .]</td>\n", | |
" <td>[O, O, O, O, O, O, B-DI, I-DI, I-DI, I-DI, O]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>355</th>\n", | |
" <td>[У, меня, двое, детей, .]</td>\n", | |
" <td>[O, O, O, O, O]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3101</th>\n", | |
" <td>[Не, спорю, наслышана, о, широте, его, применения, ,, но, нам, он, не, подошел, абсолютно, !]</td>\n", | |
" <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tokens tags\n", | |
"3132 [Но, в, 3, месяца, нам, ставили, гипертонус, ручек, и, ножек, .] [O, O, O, O, O, O, B-DI, I-DI, I-DI, I-DI, O]\n", | |
"355 [У, меня, двое, детей, .] [O, O, O, O, O]\n", | |
"3101 [Не, спорю, наслышана, о, широте, его, применения, ,, но, нам, он, не, подошел, абсолютно, !] [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "sE0souTBykq1" | |
}, | |
"source": [ | |
"Соберём все виды меток в список. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "16SRNc6csJBC", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "a06bc57e-5b17-4b45-8009-a7cbfc5e7592" | |
}, | |
"source": [ | |
"label_list = sorted({label for item in ner_train for label in item['tags']})\n", | |
"if 'O' in label_list:\n", | |
" label_list.remove('O')\n", | |
" label_list = ['O'] + label_list\n", | |
"label_list" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['O',\n", | |
" 'B-ADR',\n", | |
" 'B-DI',\n", | |
" 'B-Drugclass',\n", | |
" 'B-Drugform',\n", | |
" 'B-Drugname',\n", | |
" 'B-Finding',\n", | |
" 'I-ADR',\n", | |
" 'I-DI',\n", | |
" 'I-Drugclass',\n", | |
" 'I-Drugform',\n", | |
" 'I-Drugname',\n", | |
" 'I-Finding']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 13 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ckjbVWLoyYYf" | |
}, | |
"source": [ | |
"Сложим наши данные в объект [`DatasetDict`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasetdict), нативный для huggingface." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "4E3yy6wmUp-z" | |
}, | |
"source": [ | |
"from datasets import Dataset, DatasetDict" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "3YavIR4eU5ZY", | |
"outputId": "8b2ae069-88ac-405a-83a8-ff4a73d7d215" | |
}, | |
"source": [ | |
"ner_data = DatasetDict({\n", | |
" 'train': Dataset.from_pandas(pd.DataFrame(ner_train)),\n", | |
" 'test': Dataset.from_pandas(pd.DataFrame(ner_test))\n", | |
"})\n", | |
"ner_data" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"DatasetDict({\n", | |
" train: Dataset({\n", | |
" features: ['tokens', 'tags'],\n", | |
" num_rows: 3847\n", | |
" })\n", | |
" test: Dataset({\n", | |
" features: ['tokens', 'tags'],\n", | |
" num_rows: 962\n", | |
" })\n", | |
"})" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 15 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "n9qywopnIrJH" | |
}, | |
"source": [ | |
"## Preprocessing the data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "YVx71GdAIrJH" | |
}, | |
"source": [ | |
"Before we can feed those texts to our model, we need to preprocess them. This is done by a 🤗 Transformers `Tokenizer` which will (as the name indicates) tokenize the inputs (including converting the tokens to their corresponding IDs in the pretrained vocabulary) and put it in a format the model expects, as well as generate the other inputs that model requires.\n", | |
"\n", | |
"To do all of this, we instantiate our tokenizer with the `AutoTokenizer.from_pretrained` method, which will ensure:\n", | |
"\n", | |
"- we get a tokenizer that corresponds to the model architecture we want to use,\n", | |
"- we download the vocabulary used when pretraining this specific checkpoint.\n", | |
"\n", | |
"That vocabulary will be cached, so it's not downloaded again the next time we run the cell." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "eXNLu_-nIrJI", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 177, | |
"referenced_widgets": [ | |
"38effe6811e0445ea6a06fbf62322cb2", | |
"9c3e627f3f214708a0eada8855e345e1", | |
"4b4eaf184a9a482798ac08ff5febafb9", | |
"c3566754043543218423794f629214d6", | |
"b92ff60477c043e0b2bdaea88a70295b", | |
"5e07fb58967a4d03931682646417f324", | |
"c8847e2fd0654eabb62848c927685ba4", | |
"bb59a31387e84bc18937ca0aafb36230", | |
"7acb711b2f994c0c8c292f7735ccfd15", | |
"93c12e9ea0294f70b4dca310239b55e4", | |
"44595961fd42457496803732f50e813b", | |
"97db5fbd0b134002a15f1e89b2e2a871", | |
"4ac3dbe72f9b4dd2b27d91630736fbcc", | |
"937d10a0f080492ab64efbb53f6ebe83", | |
"3dab395a7da545838fb6c54fc19420cc", | |
"712ed73184ec4551a6891862b3295b22", | |
"5281ff75b7144218a6218c65d8789f48", | |
"bedec2ca0adc4239b35f94a23a9b3cdd", | |
"8263aecd8ead4ceabf6112e536ac6dbd", | |
"e7f42df803cc4ed6ae43ead43e283ed8", | |
"57770139538d40c38c68112f622993f3", | |
"c3caeafc16f44882938d7c1dcb56cb18", | |
"0389bbf4dde74c4db0117a6f66c8808f", | |
"bb94580fba5140078f7a4ac289308e6f", | |
"ff1d724506ef4ab1a73c4a6635a95cdb", | |
"6568a4b7ebba4b66ae324cfb1e4c6c56", | |
"37f3d4726084402ba1ff26773197b415", | |
"6f02d9d6f80249e4949123b465d56584", | |
"f5e2e39b8c344f81b40f43c7645b6fae", | |
"2f8e9faf6afe4b278a4889a13e29fd68", | |
"ff12a26f63e643fbb301dffe15f375cf", | |
"2fd0ded25025401d917bc2624b23a5ce", | |
"fd461fa9ee1249dbb49e10bf7201f9d9", | |
"91e9fdb905864efa8d42ee7cb3680e08", | |
"d8aa99a5f7cb42b9b3ca28edb0b0a007", | |
"0f4c5d11ce814b83bb85389b3c5a4c5f", | |
"62ae82032a8a403b989ee8e0ab06f58a", | |
"89ccf6c81454461c9a94ee0b9820d4a9", | |
"c76403b23ddd4fd5a644e63e21da8358", | |
"303a6382c4ff45439a40c47e969306da", | |
"24bff1add1804f5ea46b5edaf4ca7428", | |
"6e2ff1b9214049188ac0587fb33b5352", | |
"5f25f5e5ad604408bcb53d0b70f67f20", | |
"76260400334d4c4f8985e69b4800ae28", | |
"21fdf7eb6fb94da0bc9639b3f4ea7f00", | |
"20b1444148dc40f89b107e30dbcf6c0b", | |
"36dc0d7a19c04a47a06618e187ee894a", | |
"791e009541a64d749330e6123ca7d87f", | |
"06c63accbd2a4903b762ed21545bfbbe", | |
"07fee35962004c8996c8acef923292eb", | |
"b3332e5f66ad4c6c830f28bc290cd4bd", | |
"b831ba8c276b4a1bb0ef7ae16a7a8fc9", | |
"7eff20962dfe422b9523c4e74f5372aa", | |
"663c10e13a0e47f7b115ad50bd5b3965", | |
"997504803ac5445588c07cf97049d14a" | |
] | |
}, | |
"outputId": "61183128-8efa-44c0-83e5-1bddd1d0fc06" | |
}, | |
"source": [ | |
"from transformers import AutoTokenizer\n", | |
" \n", | |
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "38effe6811e0445ea6a06fbf62322cb2", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/341 [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "97db5fbd0b134002a15f1e89b2e2a871", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/632 [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "0389bbf4dde74c4db0117a6f66c8808f", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/241k [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "91e9fdb905864efa8d42ee7cb3680e08", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/468k [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "21fdf7eb6fb94da0bc9639b3f4ea7f00", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/112 [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "rowT4iCLIrJK" | |
}, | |
"source": [ | |
"You can directly call this tokenizer on one sentence:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "a5hBlsrHIrJL", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "685cf29d-15c9-40ac-e802-ae3035b0ca14" | |
}, | |
"source": [ | |
"tokenizer(\"Hello, this is one sentence!\")" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'input_ids': [2, 9944, 16, 881, 550, 835, 15503, 5, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 17 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ZdStg37nsJBE" | |
}, | |
"source": [ | |
"Depending on the model you selected, you will see different keys in the dictionary returned by the cell above. They don't matter much for what we're doing here (just know they are required by the model we will instantiate later), you can learn more about them in [this tutorial](https://huggingface.co/transformers/preprocessing.html) if you're interested.\n", | |
"\n", | |
"If, as is the case here, your inputs have already been split into words, you should pass the list of words to your tokenzier with the argument `is_split_into_words=True`:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "b_yJ2hgDsJBF", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "211c06e6-ca98-46db-eac3-0f4c6809a400" | |
}, | |
"source": [ | |
"tokenizer([\"Hello\", \",\", \"this\", \"is\", \"one\", \"sentence\", \"split\", \"into\", \"words\", \".\"], is_split_into_words=True)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'input_ids': [2, 9944, 16, 881, 550, 835, 15503, 7440, 996, 6301, 18, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 18 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "4JdDuFvbsJBF" | |
}, | |
"source": [ | |
"Note that transformers are often pretrained with subword tokenizers, meaning that even if your inputs have been split into words already, each of those words could be split again by the tokenizer. Let's look at an example of that:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OjrkjteOsJBF", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "a2841a74-4493-4417-cad8-e53d87875542" | |
}, | |
"source": [ | |
"example = ner_train[5]\n", | |
"print(example[\"tokens\"])" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['Мы', 'поменяли', 'место', 'жительства', 'и', 'перевели', 'дочь', 'в', 'школу', ',', 'которая', 'находится', 'ближе', 'к', 'дому', '.']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QU8fkdJMsJBF", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "9eec8122-4df6-43d8-8b53-52b296e1b640" | |
}, | |
"source": [ | |
"tokenized_input = tokenizer(example[\"tokens\"], is_split_into_words=True)\n", | |
"tokens = tokenizer.convert_ids_to_tokens(tokenized_input[\"input_ids\"])\n", | |
"print(tokens)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['[CLS]', 'Мы', 'пом', '##ен', '##яли', 'место', 'ж', '##итель', '##ства', 'и', 'пер', '##еве', '##ли', 'дочь', 'в', 'школу', ',', 'которая', 'находится', 'б', '##ли', '##же', 'к', 'дому', '.', '[SEP]']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "-7zwzY9wsJBG" | |
}, | |
"source": [ | |
"Чтобы перейти с уровня слов на уровень subword tokens, нужно ещё раз предобработать тексты." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "F39uz6wusJBG", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "b0d90db2-f812-4478-bb30-b57235a6a204" | |
}, | |
"source": [ | |
"len(example[\"tags\"]), len(tokenized_input[\"input_ids\"])" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(16, 26)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 21 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "pnBazSrTsJBG" | |
}, | |
"source": [ | |
"Thankfully, the tokenizer returns outputs that have a `word_ids` method which can help us." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Rt7_5_bXsJBH", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "90d5e41e-92e4-4199-bcac-f71e2dd3488c" | |
}, | |
"source": [ | |
"print(tokenized_input.word_ids())" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[None, 0, 1, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 13, 14, 15, None]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "rP1PvW2isJBH" | |
}, | |
"source": [ | |
"As we can see, it returns a list with the same number of elements as our processed input ids, mapping special tokens to `None` and all other tokens to their respective word. This way, we can align the labels with the processed input ids." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NeVhtoANsJBH", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "dcc7ae1b-6d60-4abb-c33f-ba9bad0bd859" | |
}, | |
"source": [ | |
"word_ids = tokenized_input.word_ids()\n", | |
"aligned_labels = [-100 if i is None else example[\"tags\"][i] for i in word_ids]\n", | |
"print(len(aligned_labels), len(tokenized_input[\"input_ids\"]))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"26 26\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "MM4fgSPDsJBH" | |
}, | |
"source": [ | |
"Here we set the labels of all special tokens to -100 (the index that is ignored by PyTorch) and the labels of all other tokens to the label of the word they come from. Another strategy is to set the label only on the first token obtained from a given word, and give a label of -100 to the other subtokens from the same word. We propose the two strategies here, just change the flag `label_all_tokens`." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "2C0hcmp9IrJQ" | |
}, | |
"source": [ | |
"We're now ready to write the function that will preprocess our samples. We feed them to the `tokenizer` with the argument `truncation=True` (to truncate texts that are bigger than the maximum size allowed by the model) and `is_split_into_words=True` (as seen above). Then we align the labels with the token ids using the strategy we picked:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vc0BSBLIIrJQ" | |
}, | |
"source": [ | |
"def tokenize_and_align_labels(examples, label_all_tokens=True):\n", | |
" tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n", | |
"\n", | |
" labels = []\n", | |
" for i, label in enumerate(examples['tags']):\n", | |
" word_ids = tokenized_inputs.word_ids(batch_index=i)\n", | |
" previous_word_idx = None\n", | |
" label_ids = []\n", | |
" for word_idx in word_ids:\n", | |
" # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n", | |
" # ignored in the loss function.\n", | |
" if word_idx is None:\n", | |
" label_ids.append(-100)\n", | |
" # We set the label for the first token of each word.\n", | |
" elif word_idx != previous_word_idx:\n", | |
" label_ids.append(label[word_idx])\n", | |
" # For the other tokens in a word, we set the label to either the current label or -100, depending on\n", | |
" # the label_all_tokens flag.\n", | |
" else:\n", | |
" label_ids.append(label[word_idx] if label_all_tokens else -100)\n", | |
" previous_word_idx = word_idx\n", | |
"\n", | |
" label_ids = [label_list.index(idx) if isinstance(idx, str) else idx for idx in label_ids]\n", | |
"\n", | |
" labels.append(label_ids)\n", | |
"\n", | |
" tokenized_inputs[\"labels\"] = labels\n", | |
" return tokenized_inputs" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "0lm8ozrJIrJR" | |
}, | |
"source": [ | |
"This function works with one or several examples. In the case of several examples, the tokenizer will return a list of lists for each key:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-b70jh26IrJS", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "7d76f9ad-a433-444f-b118-2bdb9d2013cd" | |
}, | |
"source": [ | |
"tokenize_and_align_labels(ner_data['train'][22:23])" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'input_ids': [[2, 1041, 4033, 3236, 9267, 331, 19173, 19106, 26629, 1887, 22018, 548, 22276, 320, 21538, 16, 705, 13718, 22264, 548, 18397, 14063, 11137, 626, 16296, 24531, 18, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]]}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 25 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "zS-6iXTkIrJT" | |
}, | |
"source": [ | |
"To apply this function on all the sentences (or pairs of sentences) in our dataset, we just use the `map` method of our `dataset` object we created earlier. This will apply the function on all the elements of all the splits in `dataset`, so our training, validation and testing data will be preprocessed in one single command." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "DDtsaJeVIrJT", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 81, | |
"referenced_widgets": [ | |
"e16827a6f03a4b92889daf18d9917126", | |
"7b3185264cfe469683ad9cc81b0a8484", | |
"20ea5f6227b041969b1ce0d686a39121", | |
"78818c7330fd489c8820d845afab2fca", | |
"19c53fdf63a0408e8ad85e56a94d7dcd", | |
"43d9892c92f343369d714477c706d45a", | |
"91f00c3eed1b41d288a53cf829f88555", | |
"f2f2a65d8c5d4627855526eccf8c68d7", | |
"2c4a9988fa90474ba9aa1f48bf03704a", | |
"ec76c271eccd45c7b8d28a15274b1d50", | |
"eefb8e1e66ff486e92c0ce618c12be66", | |
"b27b3845581c4dcba258672ecde20982", | |
"16091398feea4bfb8e5c07a679934a3e", | |
"b6f776fb81874175a00f9d4569edf89c", | |
"9ae961550c684fbda20bbec6043ca80e", | |
"54e1fd908fed41c981e8bb39068da20c", | |
"c46c29c17e5d41128bb8bc401c5c4c8c", | |
"d0470494b6be4bf0af4fc7007e285149", | |
"9c406cfcfcf9424181939d2f6009090a", | |
"d2e8a291b9c54a45ba9f812ec6d19fcc", | |
"6fb72f595fb14608ae7e6a20c2e410a9", | |
"e174a5cbd06441329ccd8cb547b44503" | |
] | |
}, | |
"outputId": "b600760b-c8ce-4744-b0bc-fa777a6e9331" | |
}, | |
"source": [ | |
"tokenized_datasets = ner_data.map(tokenize_and_align_labels, batched=True)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "e16827a6f03a4b92889daf18d9917126", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
" 0%| | 0/4 [00:00<?, ?ba/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "b27b3845581c4dcba258672ecde20982", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
" 0%| | 0/1 [00:00<?, ?ba/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "voWiw8C7IrJV" | |
}, | |
"source": [ | |
"Even better, the results are automatically cached by the 🤗 Datasets library to avoid spending time on this step the next time you run your notebook. The 🤗 Datasets library is normally smart enough to detect when the function you pass to map has changed (and thus requires to not use the cache data). For instance, it will properly detect if you change the task in the first cell and rerun the notebook. 🤗 Datasets warns you when it uses cached files, you can pass `load_from_cache_file=False` in the call to `map` to not use the cached files and force the preprocessing to be applied again.\n", | |
"\n", | |
"Note that we passed `batched=True` to encode the texts by batches together. This is to leverage the full benefit of the fast tokenizer we loaded earlier, which will use multi-threading to treat the texts in a batch concurrently." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "545PP3o8IrJV" | |
}, | |
"source": [ | |
"## Fine-tuning the model" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "FBiW8UpKIrJW" | |
}, | |
"source": [ | |
"Now that our data is ready, we can download the pretrained model and fine-tune it. Since all our tasks are about token classification, we use the `AutoModelForTokenClassification` class. Like with the tokenizer, the `from_pretrained` method will download and cache the model for us. The only thing we have to specify is the number of labels for our problem (which we can get from the features, as seen before):" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "4d2zEASrUs1E", | |
"outputId": "7f0158f6-0aae-4fac-a11c-ad4996558bfc" | |
}, | |
"source": [ | |
"label_list" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['O',\n", | |
" 'B-ADR',\n", | |
" 'B-DI',\n", | |
" 'B-Drugclass',\n", | |
" 'B-Drugform',\n", | |
" 'B-Drugname',\n", | |
" 'B-Finding',\n", | |
" 'I-ADR',\n", | |
" 'I-DI',\n", | |
" 'I-Drugclass',\n", | |
" 'I-Drugform',\n", | |
" 'I-Drugname',\n", | |
" 'I-Finding']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 28 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "TlqNaB8jIrJW", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 152, | |
"referenced_widgets": [ | |
"178b2e70a03141c3a8d14c03b1024d34", | |
"a2a75ec023b64fb0813a0d3b9da549a3", | |
"6446a516afdf4406a5a8c876aa2a0179", | |
"787dde6c71aa4303a3f1ae908bdf3288", | |
"04e795570e2245e2844e7acfd36611e4", | |
"a0441eab19d441d7ae60ea926a1ddabe", | |
"00a1bad590e34fc09067414ed4bae1d9", | |
"5b3223766ec44a0781e88c6f0d276c12", | |
"085cbbc409874d57b38930b6b05ecfd9", | |
"1a255ac062e94624a2ecfb4f58889d74", | |
"1d51207792fd4afb849e2ae72ddd68ce" | |
] | |
}, | |
"outputId": "4adff280-2147-4280-aff7-25b4b22c22f1" | |
}, | |
"source": [ | |
"from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer\n", | |
"\n", | |
"model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_list))\n", | |
"model.config.id2label = dict(enumerate(label_list))\n", | |
"model.config.label2id = {v: k for k, v in model.config.id2label.items()}" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "178b2e70a03141c3a8d14c03b1024d34", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/47.7M [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Some weights of the model checkpoint at cointegrated/rubert-tiny were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']\n", | |
"- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", | |
"- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", | |
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at cointegrated/rubert-tiny and are newly initialized: ['classifier.weight', 'classifier.bias']\n", | |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CczA5lJlIrJX" | |
}, | |
"source": [ | |
"The warning is telling us we are throwing away some weights (the `vocab_transform` and `vocab_layer_norm` layers) and randomly initializing some other (the `pre_classifier` and `classifier` layers). This is absolutely normal in this case, because we are removing the head used to pretrain the model on a masked language modeling objective and replacing it with a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "_N8urzhyIrJY" | |
}, | |
"source": [ | |
"To instantiate a `Trainer`, we will need to define three more things. The most important is the [`TrainingArguments`](https://huggingface.co/transformers/main_classes/trainer.html#transformers.TrainingArguments), which is a class that contains all the attributes to customize the training. It requires one folder name, which will be used to save the checkpoints of the model, and all other arguments are optional:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Bliy8zgjIrJY" | |
}, | |
"source": [ | |
"args = TrainingArguments(\n", | |
" \"ner\",\n", | |
" evaluation_strategy = \"epoch\",\n", | |
" learning_rate=2e-5,\n", | |
" per_device_train_batch_size=batch_size,\n", | |
" per_device_eval_batch_size=batch_size,\n", | |
" num_train_epochs=10,\n", | |
" weight_decay=0.01,\n", | |
" save_strategy='no',\n", | |
" report_to='none',\n", | |
")" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "km3pGVdTIrJc" | |
}, | |
"source": [ | |
"Here we set the evaluation to be done at the end of each epoch, tweak the learning rate, use the `batch_size` defined at the top of the notebook and customize the number of epochs for training, as well as the weight decay." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "4e6jrE3TsJBM" | |
}, | |
"source": [ | |
"Then we will need a data collator that will batch our processed examples together while applying padding to make them all the same size (each pad will be padded to the length of its longest example). There is a data collator for this task in the Transformers library, that not only pads the inputs, but also the labels:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "pyiUUwuCsJBM" | |
}, | |
"source": [ | |
"from transformers import DataCollatorForTokenClassification\n", | |
"\n", | |
"data_collator = DataCollatorForTokenClassification(tokenizer)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "YY7DtOOesJBM" | |
}, | |
"source": [ | |
"The last thing to define for our `Trainer` is how to compute the metrics from the predictions. Here we will load the [`seqeval`](https://github.com/chakki-works/seqeval) metric (which is commonly used to evaluate results on the CONLL dataset) via the Datasets library." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qFF2_ArssJBM", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 49, | |
"referenced_widgets": [ | |
"bba28fea430d436981b0bfab06fb4ee6", | |
"fe16833673ba4dc79001d9b3d28eb6d2", | |
"513e9fa4a8a04f889d68ac7658818465", | |
"b38e9ab8c6fa4ea1952a2265dcdcfff5", | |
"91c93a7307854ac98d9bdf6746286517", | |
"c291de1c9ba343ff8e140a8c7ef1496a", | |
"b1e72abd13c84f08b763eae36928fb4e", | |
"251fc9cb4f574aa4881b722cef11324a", | |
"71c5d3acf8674d77b38d6fa6b0aba8d5", | |
"94419c78831745928f8e5327f53ef5e0", | |
"31a5bb08896f47309e658dac697b6680" | |
] | |
}, | |
"outputId": "d0e04cc6-0334-48a3-851e-9688ee5127cf" | |
}, | |
"source": [ | |
"metric = load_metric(\"seqeval\")" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "bba28fea430d436981b0bfab06fb4ee6", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"Downloading: 0%| | 0.00/2.48k [00:00<?, ?B/s]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Ennxn1jysJBM" | |
}, | |
"source": [ | |
"This metric takes list of labels for the predictions and references:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YOfoAVULsJBN", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "93aa3610-aa5b-4318-c3d2-02171e9596e9" | |
}, | |
"source": [ | |
"example = ner_train[4]\n", | |
"labels = example['tags']\n", | |
"metric.compute(predictions=[labels], references=[labels])" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'DI': {'f1': 1.0, 'number': 1, 'precision': 1.0, 'recall': 1.0},\n", | |
" 'Drugform': {'f1': 1.0, 'number': 2, 'precision': 1.0, 'recall': 1.0},\n", | |
" 'overall_accuracy': 1.0,\n", | |
" 'overall_f1': 1.0,\n", | |
" 'overall_precision': 1.0,\n", | |
" 'overall_recall': 1.0}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 32 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "7sZOdRlRIrJd" | |
}, | |
"source": [ | |
"So we will need to do a bit of post-processing on our predictions:\n", | |
"- select the predicted index (with the maximum logit) for each token\n", | |
"- convert it to its string label\n", | |
"- ignore everywhere we set a label of -100\n", | |
"\n", | |
"The following function does all this post-processing on the result of `Trainer.evaluate` (which is a namedtuple containing predictions and labels) before applying the metric:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UmvbnJ9JIrJd" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"def compute_metrics(p):\n", | |
" predictions, labels = p\n", | |
" predictions = np.argmax(predictions, axis=2)\n", | |
"\n", | |
" # Remove ignored index (special tokens)\n", | |
" true_predictions = [\n", | |
" [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", | |
" for prediction, label in zip(predictions, labels)\n", | |
" ]\n", | |
" true_labels = [\n", | |
" [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n", | |
" for prediction, label in zip(predictions, labels)\n", | |
" ]\n", | |
"\n", | |
" results = metric.compute(predictions=true_predictions, references=true_labels, zero_division=0)\n", | |
" return {\n", | |
" \"precision\": results[\"overall_precision\"],\n", | |
" \"recall\": results[\"overall_recall\"],\n", | |
" \"f1\": results[\"overall_f1\"],\n", | |
" \"accuracy\": results[\"overall_accuracy\"],\n", | |
" }" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "rXuFTAzDIrJe" | |
}, | |
"source": [ | |
"Note that we drop the precision/recall/f1 computed for each category and only focus on the overall precision/recall/f1/accuracy.\n", | |
"\n", | |
"Then we just need to pass all of this along with our datasets to the `Trainer`:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "imY1oC3SIrJf" | |
}, | |
"source": [ | |
"trainer = Trainer(\n", | |
" model,\n", | |
" args,\n", | |
" train_dataset=tokenized_datasets[\"train\"],\n", | |
" eval_dataset=tokenized_datasets[\"test\"],\n", | |
" data_collator=data_collator,\n", | |
" tokenizer=tokenizer,\n", | |
" compute_metrics=compute_metrics\n", | |
")" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 238 | |
}, | |
"id": "ZP1dvvNlXg9Y", | |
"outputId": "5615b5dc-3794-48d4-af11-12d9a6a0eac6" | |
}, | |
"source": [ | |
"trainer.evaluate()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, tags.\n", | |
"***** Running Evaluation *****\n", | |
" Num examples = 962\n", | |
" Batch size = 16\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='61' max='61' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [61/61 00:00]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'eval_accuracy': 0.07571226846083562,\n", | |
" 'eval_f1': 0.03137110167927662,\n", | |
" 'eval_loss': 2.604278326034546,\n", | |
" 'eval_precision': 0.018480269594521145,\n", | |
" 'eval_recall': 0.10372178157413056,\n", | |
" 'eval_runtime': 1.5067,\n", | |
" 'eval_samples_per_second': 638.492,\n", | |
" 'eval_steps_per_second': 40.486}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 35 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "a-xw5JvKzyrf" | |
}, | |
"source": [ | |
"В начале обучения заморозим все параметры в модели, кроме последнего слоя, и посмотрим, насколько хорошо она обучится." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "lzwwl_YQWKxq" | |
}, | |
"source": [ | |
"for param in model.bert.parameters():\n", | |
" param.requires_grad = False" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "EhRisAHxWZRG", | |
"outputId": "95232006-ea19-46f8-a893-4fdfa44805f1" | |
}, | |
"source": [ | |
"for name, param in model.named_parameters():\n", | |
" if param.requires_grad:\n", | |
" print(name)\n", | |
" print(param)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"classifier.weight\n", | |
"Parameter containing:\n", | |
"tensor([[-5.3295e-02, 8.1591e-05, -1.4091e-02, ..., 9.4435e-03,\n", | |
" 2.6371e-02, -2.7459e-02],\n", | |
" [-1.4154e-02, 1.8980e-02, -6.4149e-03, ..., -3.0063e-02,\n", | |
" -8.0335e-03, -1.3474e-02],\n", | |
" [ 3.9226e-03, -1.7339e-03, -2.4043e-03, ..., 1.1911e-02,\n", | |
" -6.8623e-03, -3.6764e-02],\n", | |
" ...,\n", | |
" [ 2.9699e-02, -2.5830e-02, 2.9956e-03, ..., 2.0724e-02,\n", | |
" 2.6304e-02, -1.3127e-04],\n", | |
" [-2.8258e-02, 1.9521e-03, -1.2629e-02, ..., -2.4292e-02,\n", | |
" -1.9133e-02, 3.5226e-02],\n", | |
" [ 4.8563e-03, -3.9019e-02, 2.2573e-02, ..., 2.3094e-02,\n", | |
" -5.4334e-03, -3.1281e-02]], device='cuda:0', requires_grad=True)\n", | |
"classifier.bias\n", | |
"Parameter containing:\n", | |
"tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',\n", | |
" requires_grad=True)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CdzABDVcIrJg" | |
}, | |
"source": [ | |
"We can now finetune our model by just calling the `train` method:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nsuTXCjMeYHE" | |
}, | |
"source": [ | |
"import logging\n", | |
"from transformers.trainer import logger as noisy_logger\n", | |
"noisy_logger.setLevel(logging.WARNING)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yGz3c_A_sJBO", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 429 | |
}, | |
"outputId": "187b4d00-27fc-464b-da73-9fd47e2dc862" | |
}, | |
"source": [ | |
"trainer.train()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='2410' max='2410' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [2410/2410 00:31, Epoch 10/10]\n", | |
" </div>\n", | |
" <table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>Epoch</th>\n", | |
" <th>Training Loss</th>\n", | |
" <th>Validation Loss</th>\n", | |
" <th>Precision</th>\n", | |
" <th>Recall</th>\n", | |
" <th>F1</th>\n", | |
" <th>Accuracy</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>No log</td>\n", | |
" <td>2.034866</td>\n", | |
" <td>0.032157</td>\n", | |
" <td>0.059487</td>\n", | |
" <td>0.041747</td>\n", | |
" <td>0.630991</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>No log</td>\n", | |
" <td>1.594469</td>\n", | |
" <td>0.042105</td>\n", | |
" <td>0.004881</td>\n", | |
" <td>0.008748</td>\n", | |
" <td>0.815724</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>2.043100</td>\n", | |
" <td>1.282439</td>\n", | |
" <td>0.052632</td>\n", | |
" <td>0.000305</td>\n", | |
" <td>0.000607</td>\n", | |
" <td>0.826314</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>2.043100</td>\n", | |
" <td>1.079169</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826854</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>1.267200</td>\n", | |
" <td>0.954540</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>1.267200</td>\n", | |
" <td>0.880644</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>0.932500</td>\n", | |
" <td>0.837882</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>0.932500</td>\n", | |
" <td>0.813664</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>0.808700</td>\n", | |
" <td>0.801121</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>0.808700</td>\n", | |
" <td>0.797258</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.826896</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table><p>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='122' max='61' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [61/61 00:53]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"TrainOutput(global_step=2410, training_loss=1.181212188594074, metrics={'train_runtime': 31.5523, 'train_samples_per_second': 1219.246, 'train_steps_per_second': 76.381, 'total_flos': 35752217175750.0, 'train_loss': 1.181212188594074, 'epoch': 10.0})" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 39 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "H14j1R3cbDPO" | |
}, | |
"source": [ | |
"Модель недообучилась: похоже, что нужно обучить больше слоёв. Разморозим их все (но, воможно, более правильно было бы разморозить лишь несколько верхних), и поучимся ещё эпох 20." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "65soVR9sbE77" | |
}, | |
"source": [ | |
"# разморозка\n", | |
"for param in model.parameters():\n", | |
" param.requires_grad = True" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "u-3sfj5ocug0", | |
"outputId": "32b7bd21-4999-4f15-e1b8-cfd60dcdf4f8" | |
}, | |
"source": [ | |
"args = TrainingArguments(\n", | |
" \"ner\",\n", | |
" evaluation_strategy = \"epoch\",\n", | |
" learning_rate=1e-5,\n", | |
" per_device_train_batch_size=batch_size,\n", | |
" per_device_eval_batch_size=batch_size,\n", | |
" num_train_epochs=20,\n", | |
" weight_decay=0.01,\n", | |
" save_strategy='no',\n", | |
" report_to='none',\n", | |
")" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"PyTorch: setting up devices\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wGW0r33pdLOy" | |
}, | |
"source": [ | |
"trainer = Trainer(\n", | |
" model,\n", | |
" args,\n", | |
" train_dataset=tokenized_datasets[\"train\"],\n", | |
" eval_dataset=tokenized_datasets[\"test\"],\n", | |
" data_collator=data_collator,\n", | |
" tokenizer=tokenizer,\n", | |
" compute_metrics=compute_metrics\n", | |
")" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 706 | |
}, | |
"id": "C5nZBs-BbFRq", | |
"outputId": "61effef3-dff8-4296-f3d2-00dedd88011f" | |
}, | |
"source": [ | |
"trainer.train()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='4820' max='4820' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [4820/4820 01:51, Epoch 20/20]\n", | |
" </div>\n", | |
" <table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>Epoch</th>\n", | |
" <th>Training Loss</th>\n", | |
" <th>Validation Loss</th>\n", | |
" <th>Precision</th>\n", | |
" <th>Recall</th>\n", | |
" <th>F1</th>\n", | |
" <th>Accuracy</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>No log</td>\n", | |
" <td>0.584139</td>\n", | |
" <td>0.703590</td>\n", | |
" <td>0.209274</td>\n", | |
" <td>0.322596</td>\n", | |
" <td>0.851981</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>No log</td>\n", | |
" <td>0.516597</td>\n", | |
" <td>0.603892</td>\n", | |
" <td>0.321843</td>\n", | |
" <td>0.419900</td>\n", | |
" <td>0.863070</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>0.571100</td>\n", | |
" <td>0.474432</td>\n", | |
" <td>0.609095</td>\n", | |
" <td>0.384076</td>\n", | |
" <td>0.471094</td>\n", | |
" <td>0.871252</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>0.571100</td>\n", | |
" <td>0.446405</td>\n", | |
" <td>0.624401</td>\n", | |
" <td>0.437157</td>\n", | |
" <td>0.514265</td>\n", | |
" <td>0.878769</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>0.446100</td>\n", | |
" <td>0.423940</td>\n", | |
" <td>0.619102</td>\n", | |
" <td>0.496339</td>\n", | |
" <td>0.550965</td>\n", | |
" <td>0.885414</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>0.446100</td>\n", | |
" <td>0.405271</td>\n", | |
" <td>0.620240</td>\n", | |
" <td>0.536608</td>\n", | |
" <td>0.575401</td>\n", | |
" <td>0.889733</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>0.387700</td>\n", | |
" <td>0.391646</td>\n", | |
" <td>0.630487</td>\n", | |
" <td>0.556437</td>\n", | |
" <td>0.591152</td>\n", | |
" <td>0.893222</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>0.387700</td>\n", | |
" <td>0.381404</td>\n", | |
" <td>0.606738</td>\n", | |
" <td>0.587858</td>\n", | |
" <td>0.597149</td>\n", | |
" <td>0.894468</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>0.349000</td>\n", | |
" <td>0.374620</td>\n", | |
" <td>0.603774</td>\n", | |
" <td>0.615009</td>\n", | |
" <td>0.609340</td>\n", | |
" <td>0.895922</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>0.349000</td>\n", | |
" <td>0.364899</td>\n", | |
" <td>0.621263</td>\n", | |
" <td>0.615009</td>\n", | |
" <td>0.618120</td>\n", | |
" <td>0.898787</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>11</td>\n", | |
" <td>0.320000</td>\n", | |
" <td>0.356865</td>\n", | |
" <td>0.638978</td>\n", | |
" <td>0.610128</td>\n", | |
" <td>0.624220</td>\n", | |
" <td>0.900573</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>12</td>\n", | |
" <td>0.320000</td>\n", | |
" <td>0.353724</td>\n", | |
" <td>0.621075</td>\n", | |
" <td>0.627517</td>\n", | |
" <td>0.624279</td>\n", | |
" <td>0.900365</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>13</td>\n", | |
" <td>0.304200</td>\n", | |
" <td>0.351088</td>\n", | |
" <td>0.612875</td>\n", | |
" <td>0.641855</td>\n", | |
" <td>0.627030</td>\n", | |
" <td>0.900947</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>14</td>\n", | |
" <td>0.304200</td>\n", | |
" <td>0.344875</td>\n", | |
" <td>0.635614</td>\n", | |
" <td>0.634838</td>\n", | |
" <td>0.635226</td>\n", | |
" <td>0.903273</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>15</td>\n", | |
" <td>0.290300</td>\n", | |
" <td>0.343057</td>\n", | |
" <td>0.632229</td>\n", | |
" <td>0.640329</td>\n", | |
" <td>0.636253</td>\n", | |
" <td>0.903107</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>16</td>\n", | |
" <td>0.290300</td>\n", | |
" <td>0.340833</td>\n", | |
" <td>0.637323</td>\n", | |
" <td>0.644905</td>\n", | |
" <td>0.641092</td>\n", | |
" <td>0.903896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>17</td>\n", | |
" <td>0.278900</td>\n", | |
" <td>0.338196</td>\n", | |
" <td>0.647566</td>\n", | |
" <td>0.641245</td>\n", | |
" <td>0.644390</td>\n", | |
" <td>0.904892</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>18</td>\n", | |
" <td>0.278900</td>\n", | |
" <td>0.337507</td>\n", | |
" <td>0.638947</td>\n", | |
" <td>0.651617</td>\n", | |
" <td>0.645220</td>\n", | |
" <td>0.905100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>19</td>\n", | |
" <td>0.267100</td>\n", | |
" <td>0.336935</td>\n", | |
" <td>0.637556</td>\n", | |
" <td>0.652532</td>\n", | |
" <td>0.644957</td>\n", | |
" <td>0.904975</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>20</td>\n", | |
" <td>0.267100</td>\n", | |
" <td>0.336756</td>\n", | |
" <td>0.637094</td>\n", | |
" <td>0.652837</td>\n", | |
" <td>0.644870</td>\n", | |
" <td>0.905017</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table><p>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"TrainOutput(global_step=4820, training_loss=0.35207317083208395, metrics={'train_runtime': 111.6227, 'train_samples_per_second': 689.286, 'train_steps_per_second': 43.181, 'total_flos': 71548216106580.0, 'train_loss': 0.35207317083208395, 'epoch': 20.0})" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 43 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CKASz-2vIrJi" | |
}, | |
"source": [ | |
"The `evaluate` method allows you to evaluate again on the evaluation dataset or on another dataset:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UOUcBkX8IrJi", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 188 | |
}, | |
"outputId": "c5772a91-7302-4f14-da69-c99eb281dcd1" | |
}, | |
"source": [ | |
"trainer.evaluate()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='61' max='61' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [61/61 00:00]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'epoch': 20.0,\n", | |
" 'eval_accuracy': 0.9050170279923582,\n", | |
" 'eval_f1': 0.6448696700316409,\n", | |
" 'eval_loss': 0.3367559015750885,\n", | |
" 'eval_precision': 0.6370943733253944,\n", | |
" 'eval_recall': 0.652837095790116,\n", | |
" 'eval_runtime': 1.1185,\n", | |
" 'eval_samples_per_second': 860.049,\n", | |
" 'eval_steps_per_second': 54.535}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 44 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BaMhVjZ-sJBO" | |
}, | |
"source": [ | |
"To get the precision/recall/f1 computed for each category now that we have finished training, we can apply the same function as before on the result of the `predict` method:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wm8MsZ3tsJBO", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 509 | |
}, | |
"outputId": "6fdbd471-7987-47c3-cbd0-83a82bd45ce3" | |
}, | |
"source": [ | |
"predictions, labels, _ = trainer.predict(tokenized_datasets[\"test\"])\n", | |
"predictions = np.argmax(predictions, axis=2)\n", | |
"\n", | |
"# Remove ignored index (special tokens)\n", | |
"true_predictions = [\n", | |
" [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", | |
" for prediction, label in zip(predictions, labels)\n", | |
"]\n", | |
"true_labels = [\n", | |
" [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n", | |
" for prediction, label in zip(predictions, labels)\n", | |
"]\n", | |
"\n", | |
"results = metric.compute(predictions=true_predictions, references=true_labels)\n", | |
"results" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='122' max='61' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [61/61 00:08]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", | |
" _warn_prf(average, modifier, msg_start, len(result))\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'ADR': {'f1': 0.30279898218829515,\n", | |
" 'number': 446,\n", | |
" 'precision': 0.35,\n", | |
" 'recall': 0.26681614349775784},\n", | |
" 'DI': {'f1': 0.493963782696177,\n", | |
" 'number': 821,\n", | |
" 'precision': 0.4207369323050557,\n", | |
" 'recall': 0.5980511571254568},\n", | |
" 'Drugclass': {'f1': 0.7868852459016393,\n", | |
" 'number': 336,\n", | |
" 'precision': 0.7880597014925373,\n", | |
" 'recall': 0.7857142857142857},\n", | |
" 'Drugform': {'f1': 0.7922794117647058,\n", | |
" 'number': 565,\n", | |
" 'precision': 0.8240917782026769,\n", | |
" 'recall': 0.7628318584070797},\n", | |
" 'Drugname': {'f1': 0.8734309623430963,\n", | |
" 'number': 918,\n", | |
" 'precision': 0.8400402414486922,\n", | |
" 'recall': 0.9095860566448801},\n", | |
" 'Finding': {'f1': 0.0, 'number': 192, 'precision': 0.0, 'recall': 0.0},\n", | |
" 'overall_accuracy': 0.9050170279923582,\n", | |
" 'overall_f1': 0.6448696700316409,\n", | |
" 'overall_precision': 0.6370943733253944,\n", | |
" 'overall_recall': 0.652837095790116}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 45 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nI18Xeda7X8a" | |
}, | |
"source": [ | |
"from sklearn.metrics import confusion_matrix\n", | |
"import pandas as pd" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 435 | |
}, | |
"id": "Yz9BkfrO7bg6", | |
"outputId": "d6ce002d-0803-4320-8711-ec33bdc9c40d" | |
}, | |
"source": [ | |
"cm = pd.DataFrame(\n", | |
" confusion_matrix(sum(true_labels, []), sum(true_predictions, []), labels=label_list),\n", | |
" index=label_list,\n", | |
" columns=label_list\n", | |
")\n", | |
"cm" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>O</th>\n", | |
" <th>B-ADR</th>\n", | |
" <th>B-DI</th>\n", | |
" <th>B-Drugclass</th>\n", | |
" <th>B-Drugform</th>\n", | |
" <th>B-Drugname</th>\n", | |
" <th>B-Finding</th>\n", | |
" <th>I-ADR</th>\n", | |
" <th>I-DI</th>\n", | |
" <th>I-Drugclass</th>\n", | |
" <th>I-Drugform</th>\n", | |
" <th>I-Drugname</th>\n", | |
" <th>I-Finding</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>O</th>\n", | |
" <td>19494</td>\n", | |
" <td>29</td>\n", | |
" <td>175</td>\n", | |
" <td>35</td>\n", | |
" <td>60</td>\n", | |
" <td>71</td>\n", | |
" <td>0</td>\n", | |
" <td>20</td>\n", | |
" <td>26</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B-ADR</th>\n", | |
" <td>159</td>\n", | |
" <td>135</td>\n", | |
" <td>133</td>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B-DI</th>\n", | |
" <td>242</td>\n", | |
" <td>21</td>\n", | |
" <td>525</td>\n", | |
" <td>0</td>\n", | |
" <td>17</td>\n", | |
" <td>10</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B-Drugclass</th>\n", | |
" <td>50</td>\n", | |
" <td>1</td>\n", | |
" <td>17</td>\n", | |
" <td>264</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B-Drugform</th>\n", | |
" <td>98</td>\n", | |
" <td>4</td>\n", | |
" <td>11</td>\n", | |
" <td>1</td>\n", | |
" <td>432</td>\n", | |
" <td>17</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B-Drugname</th>\n", | |
" <td>44</td>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" <td>848</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B-Finding</th>\n", | |
" <td>56</td>\n", | |
" <td>32</td>\n", | |
" <td>87</td>\n", | |
" <td>5</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>I-ADR</th>\n", | |
" <td>180</td>\n", | |
" <td>51</td>\n", | |
" <td>40</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>47</td>\n", | |
" <td>30</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>I-DI</th>\n", | |
" <td>236</td>\n", | |
" <td>17</td>\n", | |
" <td>102</td>\n", | |
" <td>10</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>11</td>\n", | |
" <td>46</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>I-Drugclass</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>I-Drugform</th>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>I-Drugname</th>\n", | |
" <td>19</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>39</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>I-Finding</th>\n", | |
" <td>25</td>\n", | |
" <td>7</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" O B-ADR B-DI ... I-Drugform I-Drugname I-Finding\n", | |
"O 19494 29 175 ... 0 0 0\n", | |
"B-ADR 159 135 133 ... 0 0 0\n", | |
"B-DI 242 21 525 ... 0 0 0\n", | |
"B-Drugclass 50 1 17 ... 0 0 0\n", | |
"B-Drugform 98 4 11 ... 0 0 0\n", | |
"B-Drugname 44 1 16 ... 0 0 0\n", | |
"B-Finding 56 32 87 ... 0 0 0\n", | |
"I-ADR 180 51 40 ... 0 0 0\n", | |
"I-DI 236 17 102 ... 0 0 0\n", | |
"I-Drugclass 0 0 0 ... 0 0 0\n", | |
"I-Drugform 3 0 0 ... 0 0 0\n", | |
"I-Drugname 19 0 0 ... 0 0 0\n", | |
"I-Finding 25 7 6 ... 0 0 0\n", | |
"\n", | |
"[13 rows x 13 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 47 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cA0jWZwjVbI7", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "4161d7fe-c5e3-4f56-b9d4-52830e14da06" | |
}, | |
"source": [ | |
"model.save_pretrained('ner_bert.bin')\n", | |
"tokenizer.save_pretrained('ner_bert.bin')" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Configuration saved in ner_bert.bin/config.json\n", | |
"Model weights saved in ner_bert.bin/pytorch_model.bin\n", | |
"tokenizer config file saved in ner_bert.bin/tokenizer_config.json\n", | |
"Special tokens file saved in ner_bert.bin/special_tokens_map.json\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"('ner_bert.bin/tokenizer_config.json',\n", | |
" 'ner_bert.bin/special_tokens_map.json',\n", | |
" 'ner_bert.bin/vocab.txt',\n", | |
" 'ner_bert.bin/added_tokens.json',\n", | |
" 'ner_bert.bin/tokenizer.json')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 48 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "C5yv9hItsJBP" | |
}, | |
"source": [ | |
"# Применение модели" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "p0JHjRKmuv_m" | |
}, | |
"source": [ | |
"import torch" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "Kp59uTtXZKT4", | |
"outputId": "6ed0bcf0-7de7-4b93-936b-0562ef0505b4" | |
}, | |
"source": [ | |
"text = ' '.join(ner_train[8]['tokens'])\n", | |
"text = ' '.join(ner_test[4]['tokens'])\n", | |
"text" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
}, | |
"text/plain": [ | |
"'Охотно применяю его при борьбе с насморком , что в моем случае явление очень частое .'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 50 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6h2hiUylZVmF" | |
}, | |
"source": [ | |
"import torch" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Yt8EXbDuuB1U", | |
"outputId": "bf2699a1-c42f-42bb-b00b-8adb7aca1edd" | |
}, | |
"source": [ | |
"tokens = tokenizer(text, return_tensors='pt')\n", | |
"tokens = {k: v.to(model.device) for k, v in tokens.items()}\n", | |
"\n", | |
"with torch.no_grad():\n", | |
" pred = model(**tokens)\n", | |
"pred.logits.shape" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"torch.Size([1, 29, 13])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 52 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "2GQPlbnyuu6H", | |
"outputId": "e43272b6-3e22-44bb-ec7b-15a2acf570d3" | |
}, | |
"source": [ | |
"indices = pred.logits.argmax(dim=-1)[0].cpu().numpy()\n", | |
"token_text = tokenizer.convert_ids_to_tokens(tokens['input_ids'][0])\n", | |
"for t, idx in zip(token_text, indices):\n", | |
" print(f'{t:15s} {label_list[idx]:10s}')" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[CLS] O \n", | |
"О O \n", | |
"##хо O \n", | |
"##тно O \n", | |
"при O \n", | |
"##мен O \n", | |
"##я O \n", | |
"##ю O \n", | |
"его O \n", | |
"при O \n", | |
"борьбе O \n", | |
"с O \n", | |
"нас B-DI \n", | |
"##мор B-DI \n", | |
"##ком B-DI \n", | |
", O \n", | |
"что O \n", | |
"в O \n", | |
"м O \n", | |
"##ое O \n", | |
"##м O \n", | |
"случае O \n", | |
"я O \n", | |
"##вление O \n", | |
"очень O \n", | |
"часто O \n", | |
"##е O \n", | |
". O \n", | |
"[SEP] O \n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "tBSq9enuwJ_V" | |
}, | |
"source": [ | |
"Более простое применение модели: пайплайн от huggingface" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "lnrAoy6b8swA" | |
}, | |
"source": [ | |
"from transformers import pipeline" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "uowfISMu8v1k" | |
}, | |
"source": [ | |
"pipe = pipeline(model=model, tokenizer=tokenizer, task='ner', aggregation_strategy='average', device=0)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "1WjXMXCv9Nde", | |
"outputId": "e2b4f6d6-6153-4bbd-915d-a3bc4d148895" | |
}, | |
"source": [ | |
"print(text)\n", | |
"print(pipe(text))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Охотно применяю его при борьбе с насморком , что в моем случае явление очень частое .\n", | |
"[{'entity_group': 'DI', 'score': 0.73669535, 'word': 'насморком', 'start': 33, 'end': 42}]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "panjTvbH9PJL" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment