-
-
Save ajesujoba/f8bd0d9df8c6c8b94925dac149315c26 to your computer and use it in GitHub Desktop.
create_rut5-base.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "create_rut5-base.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyO5k7Vc4zthTK1pkTfX5eNT", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"2d65b38f1ede49a0b4ae70b7e1f03359": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_aace2f95ba334bd48136c0caf5dca14c", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_745f0b9b29a0423d8759fb9f2c52cfad", | |
"IPY_MODEL_0c1bd6c7a83a479db23009b96ef9caf4" | |
] | |
} | |
}, | |
"aace2f95ba334bd48136c0caf5dca14c": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"745f0b9b29a0423d8759fb9f2c52cfad": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_ed1988f523184224bb1c8b6096ab31f1", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 4309802, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 4309802, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_7a16f7fd4b6d4b0e8eabfe1f126d79d1" | |
} | |
}, | |
"0c1bd6c7a83a479db23009b96ef9caf4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_32c54722ab15498da4adc548eccf603f", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 4.31M/4.31M [01:19<00:00, 54.1kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_3a11705472864ea9a248b92c689f67e1" | |
} | |
}, | |
"ed1988f523184224bb1c8b6096ab31f1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"7a16f7fd4b6d4b0e8eabfe1f126d79d1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"32c54722ab15498da4adc548eccf603f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"3a11705472864ea9a248b92c689f67e1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5a1a3f0010324df4b4dd42289eb258f4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_983350c43d3e428b8573c8a7f6ed43ec", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_2467e4daed9945e9bad79d0bcec4efd2", | |
"IPY_MODEL_1f8a7f5fe43b41db8fabc2ffad127423" | |
] | |
} | |
}, | |
"983350c43d3e428b8573c8a7f6ed43ec": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"2467e4daed9945e9bad79d0bcec4efd2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_a6590375d193488abdbb9fe6bb46f026", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 65, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 65, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_1a61b2d84b654b18bfafaf139401dabd" | |
} | |
}, | |
"1f8a7f5fe43b41db8fabc2ffad127423": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_fa9c3c9b135a4440b5c0fcbce359b151", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 65.0/65.0 [00:00<00:00, 127B/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_ad57c3a7bb5542398cc481744bf6fe58" | |
} | |
}, | |
"a6590375d193488abdbb9fe6bb46f026": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"1a61b2d84b654b18bfafaf139401dabd": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"fa9c3c9b135a4440b5c0fcbce359b151": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"ad57c3a7bb5542398cc481744bf6fe58": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5a3980a7d3434549aeb0276ff25dfc37": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_12935e9215b841dda2e40a1c3e497726", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_e342244246e24e858d5036b4eef040b8", | |
"IPY_MODEL_50651c9c71f849569d70d7fe037a2c2f" | |
] | |
} | |
}, | |
"12935e9215b841dda2e40a1c3e497726": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"e342244246e24e858d5036b4eef040b8": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_81201911fdff4840b391ab5cbc6c2874", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 376, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 376, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_73e3647a17ef45bb8080ab67c2e70120" | |
} | |
}, | |
"50651c9c71f849569d70d7fe037a2c2f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_531ee8dbd2354128b842d82db5ec85ac", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 376/376 [01:17<00:00, 4.82B/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_ae0027cb61cb4f41b3ee8ab127b3a7b7" | |
} | |
}, | |
"81201911fdff4840b391ab5cbc6c2874": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"73e3647a17ef45bb8080ab67c2e70120": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"531ee8dbd2354128b842d82db5ec85ac": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"ae0027cb61cb4f41b3ee8ab127b3a7b7": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"7bf6adb3c017459f85a2399ede31edb2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_1fb43db4b3d74b25b2e0e7f3daa5f4c1", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_56ffd44ce9dd4cfb902b3e5d785985e5", | |
"IPY_MODEL_3146e7ba8c0d44aa8c07f31006b8dde5" | |
] | |
} | |
}, | |
"1fb43db4b3d74b25b2e0e7f3daa5f4c1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"56ffd44ce9dd4cfb902b3e5d785985e5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_737f1d75176f403d95d7e0ea18933d51", | |
"_dom_classes": [], | |
"description": "100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 1000000, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 1000000, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_faebe32278c04527abd995a4da588c13" | |
} | |
}, | |
"3146e7ba8c0d44aa8c07f31006b8dde5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_a630c1d2b88d4f9dbda38020f7eaa287", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 1000000/1000000 [08:19<00:00, 2003.52it/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_9181d9829eb24887b18f5e16364b6584" | |
} | |
}, | |
"737f1d75176f403d95d7e0ea18933d51": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"faebe32278c04527abd995a4da588c13": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"a630c1d2b88d4f9dbda38020f7eaa287": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"9181d9829eb24887b18f5e16364b6584": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"016ce230728a4da28a8992a571807576": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_2f45540c744a42129bf0a1254ed2c13d", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_7ec288558b8c43aba1abb9e5dd4612d3", | |
"IPY_MODEL_a59a755f6ccd48a0abdb35574713dbc9" | |
] | |
} | |
}, | |
"2f45540c744a42129bf0a1254ed2c13d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"7ec288558b8c43aba1abb9e5dd4612d3": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_029cc249595640dabca1cda08dd8611b", | |
"_dom_classes": [], | |
"description": "100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 1000000, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 1000000, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_9b41f256d17c4bdcaebf6dd5f42ec5f9" | |
} | |
}, | |
"a59a755f6ccd48a0abdb35574713dbc9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_0baf1aa3115b453d9caa7160da9fc398", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 1000000/1000000 [03:50<00:00, 4340.07it/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_5c2b2e9f6dfc4771a0f8671db975efd6" | |
} | |
}, | |
"029cc249595640dabca1cda08dd8611b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"9b41f256d17c4bdcaebf6dd5f42ec5f9": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"0baf1aa3115b453d9caa7160da9fc398": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"5c2b2e9f6dfc4771a0f8671db975efd6": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"b0e9eaee892b4822985a6d0f20e41f07": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_c539d3b872284fb799b03132fac6dc14", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_c1c374868d4941929a85c2a00f2f2860", | |
"IPY_MODEL_a1089e2c170549539cb9844e6e4e5472" | |
] | |
} | |
}, | |
"c539d3b872284fb799b03132fac6dc14": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"c1c374868d4941929a85c2a00f2f2860": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_cac56c0a1b6c4d16ba479c3b7301961b", | |
"_dom_classes": [], | |
"description": "100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 1000000, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 1000000, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_f75eb6fb8b5543eb812f5731d83009c9" | |
} | |
}, | |
"a1089e2c170549539cb9844e6e4e5472": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_0f0672563dd54404affe7ce0ef899ce1", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 1000000/1000000 [03:41<00:00, 4514.84it/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_8ef3b35ab56144c69768152099b623fe" | |
} | |
}, | |
"cac56c0a1b6c4d16ba479c3b7301961b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"f75eb6fb8b5543eb812f5731d83009c9": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"0f0672563dd54404affe7ce0ef899ce1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"8ef3b35ab56144c69768152099b623fe": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"b432b120350b42388447dcfcf959d673": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_5660d17e6c0e40b7bee6dc3d1af46f34", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_9dcf8c4bef5342aba837077c4904d852", | |
"IPY_MODEL_d9a80cc59ada42fe908726abd05942bf" | |
] | |
} | |
}, | |
"5660d17e6c0e40b7bee6dc3d1af46f34": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"9dcf8c4bef5342aba837077c4904d852": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_847d7a0c9f4a418e93484cd9bf8b8e0a", | |
"_dom_classes": [], | |
"description": "100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 220100, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 220100, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_6c74fe4ffc4f4013bf73d00212e5775a" | |
} | |
}, | |
"d9a80cc59ada42fe908726abd05942bf": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_5a17271685624ad3bd2aa9c88504f969", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 220100/220100 [01:05<00:00, 3338.02it/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_1780d72d89b5449eaa0dcea9c595b6d7" | |
} | |
}, | |
"847d7a0c9f4a418e93484cd9bf8b8e0a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"6c74fe4ffc4f4013bf73d00212e5775a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5a17271685624ad3bd2aa9c88504f969": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"1780d72d89b5449eaa0dcea9c595b6d7": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/avidale/44cd35bfcdaf8bedf51d97c468cc8001/create_rut5-base.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "oh2xfITDhN2u" | |
}, | |
"source": [ | |
"The goal of this notebook is to create a Russian version of mT5 model out of the multilingual one. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BoiF06nfGvtW" | |
}, | |
"source": [ | |
"!pip install transformers sentencepiece" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "zcVexj3Ye6X3" | |
}, | |
"source": [ | |
"# Removing the unused vocabulary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "X99M7UWoHC9k" | |
}, | |
"source": [ | |
"from transformers import T5ForConditionalGeneration, T5Tokenizer\n", | |
"import torch" | |
], | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 201, | |
"referenced_widgets": [ | |
"2d65b38f1ede49a0b4ae70b7e1f03359", | |
"aace2f95ba334bd48136c0caf5dca14c", | |
"745f0b9b29a0423d8759fb9f2c52cfad", | |
"0c1bd6c7a83a479db23009b96ef9caf4", | |
"ed1988f523184224bb1c8b6096ab31f1", | |
"7a16f7fd4b6d4b0e8eabfe1f126d79d1", | |
"32c54722ab15498da4adc548eccf603f", | |
"3a11705472864ea9a248b92c689f67e1", | |
"5a1a3f0010324df4b4dd42289eb258f4", | |
"983350c43d3e428b8573c8a7f6ed43ec", | |
"2467e4daed9945e9bad79d0bcec4efd2", | |
"1f8a7f5fe43b41db8fabc2ffad127423", | |
"a6590375d193488abdbb9fe6bb46f026", | |
"1a61b2d84b654b18bfafaf139401dabd", | |
"fa9c3c9b135a4440b5c0fcbce359b151", | |
"ad57c3a7bb5542398cc481744bf6fe58", | |
"5a3980a7d3434549aeb0276ff25dfc37", | |
"12935e9215b841dda2e40a1c3e497726", | |
"e342244246e24e858d5036b4eef040b8", | |
"50651c9c71f849569d70d7fe037a2c2f", | |
"81201911fdff4840b391ab5cbc6c2874", | |
"73e3647a17ef45bb8080ab67c2e70120", | |
"531ee8dbd2354128b842d82db5ec85ac", | |
"ae0027cb61cb4f41b3ee8ab127b3a7b7" | |
] | |
}, | |
"id": "7OnBRq8pHFDN", | |
"outputId": "c078cc6d-01b2-47f8-aa2f-ad34b9eb4c1b" | |
}, | |
"source": [ | |
"tokenizer = T5Tokenizer.from_pretrained(\"google/mt5-base\")\n", | |
"tokenizer" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "2d65b38f1ede49a0b4ae70b7e1f03359", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=4309802.0, style=ProgressStyle(descript…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "5a1a3f0010324df4b4dd42289eb258f4", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=65.0, style=ProgressStyle(description_w…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "5a3980a7d3434549aeb0276ff25dfc37", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=376.0, style=ProgressStyle(description_…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"PreTrainedTokenizer(name_or_path='google/mt5-base', vocab_size=250100, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>'})" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HkXHkM6OHJcH", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "e3a09b2d-82eb-4cfa-a6bb-0d37466ff821" | |
}, | |
"source": [ | |
"model = T5ForConditionalGeneration.from_pretrained('google/mt5-base')" | |
], | |
"execution_count": 37, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"You are using a model of type mt5 to instantiate a model of type t5. This is not supported for all configurations of models and can yield errors.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "YMItls1shI3-" | |
}, | |
"source": [ | |
"Our tokenizer contains 250K tokens, " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "U0vhvaP8HKm8", | |
"outputId": "d48a92ad-0358-47bf-a5a0-583af98f08c6" | |
}, | |
"source": [ | |
"print(tokenizer.vocab_size)" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"250100\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "hX8pzm4nhhMt" | |
}, | |
"source": [ | |
"The model has 582M parameters. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "hz6Bv4tZIsX5", | |
"outputId": "e40c6a94-c9d2-4839-b1dc-b386056dc1a0" | |
}, | |
"source": [ | |
"def msize(m):\n", | |
" return sum(p.numel() for p in m.parameters())\n", | |
"\n", | |
"original_size = msize(model)\n", | |
"print(msize(model))\n", | |
"print(msize(model.shared))\n", | |
"print('encoder')\n", | |
"print(msize(model.encoder))\n", | |
"print(msize(model.encoder.block))\n", | |
"print('decoder')\n", | |
"print(msize(model.decoder))\n", | |
"print(msize(model.decoder.block))\n", | |
"print(msize(model.lm_head))" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"582401280\n", | |
"192086016\n", | |
"encoder\n", | |
"277040256\n", | |
"84953472\n", | |
"decoder\n", | |
"305361024\n", | |
"113274240\n", | |
"192086016\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "18ckhebWLLra" | |
}, | |
"source": [ | |
"Input and output embeddings are 66% of the whole model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "hmvmyYsyHh2s", | |
"outputId": "aa72c8fb-e202-4460-efe9-fd28a821f1e2" | |
}, | |
"source": [ | |
"print(msize(model.shared) / msize(model))\n", | |
"print(msize(model.lm_head) / msize(model))" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0.32981729710484153\n", | |
"0.32981729710484153\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "amFXHV9OL9SU" | |
}, | |
"source": [ | |
"# Determine the new tokens" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "NfeGCTv5Vvmu" | |
}, | |
"source": [ | |
"Take a file from https://wortschatz.uni-leipzig.de/en/download/Russian as a representation of Russian language. It contains 1M sentences. \n", | |
"\n", | |
"Also take a similar representation of English, because we want our model to be bilingual, and English shares few tokens with Russian." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WxsNhpKfME5W" | |
}, | |
"source": [ | |
"!wget http://pcai056.informatik.uni-leipzig.de/downloads/corpora/rus-ru_web-public_2019_1M.tar.gz\n", | |
"!tar -xsvf rus-ru_web-public_2019_1M.tar.gz" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "XNHwPMCHiRhr" | |
}, | |
"source": [ | |
"!wget http://pcai056.informatik.uni-leipzig.de/downloads/corpora/eng-com_web-public_2018_1M.tar.gz\n", | |
"!tar -xsvf eng-com_web-public_2018_1M.tar.gz" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "gqjTHFJIiZTk" | |
}, | |
"source": [ | |
"Let us look at the sentences" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"id": "IoJlXMw_M7pT", | |
"outputId": "5d1ca6d0-4153-4290-db8e-5e0ec29e6f80" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"pd.options.display.max_colwidth = 300\n", | |
"import csv\n", | |
"fname = 'rus-ru_web-public_2019_1M/rus-ru_web-public_2019_1M-sentences.txt'\n", | |
"df_ru = pd.read_csv(fname, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n", | |
"df_ru.columns = ['idx', 'text']\n", | |
"df_ru.sample(5)" | |
], | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>idx</th>\n", | |
" <th>text</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>53482</th>\n", | |
" <td>53483</td>\n", | |
" <td>Больше Лена ничего говорить не стала, не до этого было.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>937136</th>\n", | |
" <td>937137</td>\n", | |
" <td>Чиновники наши не беднеют, а при наших доходах не разбогатеешь точно».</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>401463</th>\n", | |
" <td>401464</td>\n", | |
" <td>Кроме обязательной почты, сберкассы и трех магазинов РайПО, здесь функционируют объекты социальной инфраструктуры.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>238656</th>\n", | |
" <td>238657</td>\n", | |
" <td>Доставка по России и ближнему зарубежью.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>295958</th>\n", | |
" <td>295959</td>\n", | |
" <td>Здесь раскинулась долина, в центре которой течет поток зеленовато‑голубого цвета шириной в несколько десятков метров, светящийся в темноте, как большой освещаемый изнутри бассейн.</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" idx text\n", | |
"53482 53483 Больше Лена ничего говорить не стала, не до этого было.\n", | |
"937136 937137 Чиновники наши не беднеют, а при наших доходах не разбогатеешь точно».\n", | |
"401463 401464 Кроме обязательной почты, сберкассы и трех магазинов РайПО, здесь функционируют объекты социальной инфраструктуры.\n", | |
"238656 238657 Доставка по России и ближнему зарубежью.\n", | |
"295958 295959 Здесь раскинулась долина, в центре которой течет поток зеленовато‑голубого цвета шириной в несколько десятков метров, светящийся в темноте, как большой освещаемый изнутри бассейн." | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 18 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"id": "V-Uc7nbziyXp", | |
"outputId": "c14d04a8-5b09-4977-aa10-9ed34a2ebfba" | |
}, | |
"source": [ | |
"fname = 'eng-com_web-public_2018_1M/eng-com_web-public_2018_1M-sentences.txt'\n", | |
"df_en = pd.read_csv(fname, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n", | |
"df_en.columns = ['idx', 'text']\n", | |
"df_en.sample(5)" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>idx</th>\n", | |
" <th>text</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>536627</th>\n", | |
" <td>536628</td>\n", | |
" <td>My two crabby old men cats were not so impressed, but Miss Agnes DeMitten (aka Endora) was checking behind the monitor.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>783178</th>\n", | |
" <td>783179</td>\n", | |
" <td>There is another lightweight distribution, in the Ubuntu family called Xubuntu, but Lubuntu is far more efficient when it comes to memory usage.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>447801</th>\n", | |
" <td>447802</td>\n", | |
" <td>It's perfectly symmetric -- client to server, WinFS to Sharepoint.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>207171</th>\n", | |
" <td>207172</td>\n", | |
" <td>\"Everything we asked for we got from Judge Lasnik,\" he said, and called on President Trump to make it \"unlawful for anyone to make this information available for anyone\".</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>595030</th>\n", | |
" <td>595031</td>\n", | |
" <td>\"People who are willing to do something like this, especially if the shark hasn't made it, are brought to justice,\" Spellman said.</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" idx text\n", | |
"536627 536628 My two crabby old men cats were not so impressed, but Miss Agnes DeMitten (aka Endora) was checking behind the monitor.\n", | |
"783178 783179 There is another lightweight distribution, in the Ubuntu family called Xubuntu, but Lubuntu is far more efficient when it comes to memory usage.\n", | |
"447801 447802 It's perfectly symmetric -- client to server, WinFS to Sharepoint.\n", | |
"207171 207172 \"Everything we asked for we got from Judge Lasnik,\" he said, and called on President Trump to make it \"unlawful for anyone to make this information available for anyone\".\n", | |
"595030 595031 \"People who are willing to do something like this, especially if the shark hasn't made it, are brought to justice,\" Spellman said." | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 19 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "zhkWqfdNjNww" | |
}, | |
"source": [ | |
"Count the tokens that the current model uses for representing the sentences. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 115, | |
"referenced_widgets": [ | |
"7bf6adb3c017459f85a2399ede31edb2", | |
"1fb43db4b3d74b25b2e0e7f3daa5f4c1", | |
"56ffd44ce9dd4cfb902b3e5d785985e5", | |
"3146e7ba8c0d44aa8c07f31006b8dde5", | |
"737f1d75176f403d95d7e0ea18933d51", | |
"faebe32278c04527abd995a4da588c13", | |
"a630c1d2b88d4f9dbda38020f7eaa287", | |
"9181d9829eb24887b18f5e16364b6584", | |
"016ce230728a4da28a8992a571807576", | |
"2f45540c744a42129bf0a1254ed2c13d", | |
"7ec288558b8c43aba1abb9e5dd4612d3", | |
"a59a755f6ccd48a0abdb35574713dbc9", | |
"029cc249595640dabca1cda08dd8611b", | |
"9b41f256d17c4bdcaebf6dd5f42ec5f9", | |
"0baf1aa3115b453d9caa7160da9fc398", | |
"5c2b2e9f6dfc4771a0f8671db975efd6" | |
] | |
}, | |
"id": "lmzSON9iM_yb", | |
"outputId": "37d26a05-0566-444d-b6da-37506f197ea7" | |
}, | |
"source": [ | |
"from collections import Counter\n", | |
"from tqdm.auto import tqdm, trange\n", | |
"\n", | |
"cnt_ru = Counter()\n", | |
"for text in tqdm(df_ru.text):\n", | |
" cnt_ru.update(tokenizer.encode(text))\n", | |
"\n", | |
"cnt_en = Counter()\n", | |
"for text in tqdm(df_en.text):\n", | |
" cnt_en.update(tokenizer.encode(text))" | |
], | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "7bf6adb3c017459f85a2399ede31edb2", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=1000000.0), HTML(value='')))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "016ce230728a4da28a8992a571807576", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=1000000.0), HTML(value='')))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 66, | |
"referenced_widgets": [ | |
"b0e9eaee892b4822985a6d0f20e41f07", | |
"c539d3b872284fb799b03132fac6dc14", | |
"c1c374868d4941929a85c2a00f2f2860", | |
"a1089e2c170549539cb9844e6e4e5472", | |
"cac56c0a1b6c4d16ba479c3b7301961b", | |
"f75eb6fb8b5543eb812f5731d83009c9", | |
"0f0672563dd54404affe7ce0ef899ce1", | |
"8ef3b35ab56144c69768152099b623fe" | |
] | |
}, | |
"id": "l8UdvvAYlJ6_", | |
"outputId": "b3dd44a3-12c2-4b5d-d247-9e8d341d766d" | |
}, | |
"source": [ | |
"cnt_en = Counter()\n", | |
"for text in tqdm(df_en.text):\n", | |
" cnt_en.update(tokenizer.encode(text))" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "b0e9eaee892b4822985a6d0f20e41f07", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=1000000.0), HTML(value='')))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "sTzND5F1OkEY" | |
}, | |
"source": [ | |
"The tokens that are ever used with Russian are 23% of the whole vocabulary. With English, it is 27%.\n", | |
"\n", | |
"Surprisingly, there is more than 50% overlap between the vocabularies. Perhaps, this is because in Russian texts there are occasionally English words or other words with latin alphabet. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "M07fj3z0NWiy", | |
"outputId": "b335c276-1ab1-4465-cbb8-8a9fa5d08cff" | |
}, | |
"source": [ | |
"print(len(cnt_ru), len(cnt_ru)/tokenizer.vocab_size)\n", | |
"print(len(cnt_en), len(cnt_en)/tokenizer.vocab_size)\n", | |
"common = len(set(cnt_ru.keys()).intersection(set(cnt_en.keys())))\n", | |
"print(common, common / len(cnt_ru))" | |
], | |
"execution_count": 58, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"58438 0.23365853658536587\n", | |
"67920 0.2715713714514194\n", | |
"33211 0.5683117149799788\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "2ULUmyllmNA0" | |
}, | |
"source": [ | |
"For both English and Russian, 10K tokens cover about 95% of the vocabulary, and 20K - about 99%. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "kNudkAe5NbKT", | |
"outputId": "de2363c8-acec-4048-fc6a-5041e675ab13" | |
}, | |
"source": [ | |
"print('ru')\n", | |
"for top in 10_000, 20_000, 30_000:\n", | |
" print(top, sum(v for k, v in cnt_ru.most_common(top)) / sum(cnt_ru.values()))\n", | |
"print('en')\n", | |
"for top in 10_000, 20_000, 30_000:\n", | |
" print(top, sum(v for k, v in cnt_en.most_common(top)) / sum(cnt_en.values()))" | |
], | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"ru\n", | |
"10000 0.9645064095240437\n", | |
"20000 0.9948845835370821\n", | |
"30000 0.9982199641222749\n", | |
"en\n", | |
"10000 0.9531899764307693\n", | |
"20000 0.9840809828270257\n", | |
"30000 0.9937869259525808\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "0N_D37J3lbqr" | |
}, | |
"source": [ | |
"Remember the old vocabulary, because we are going to replace it soon!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9RzGibfZQbgP" | |
}, | |
"source": [ | |
"old_voc = tokenizer.get_vocab()\n", | |
"old_inv_voc = {v: k for k, v in old_voc.items()}" | |
], | |
"execution_count": 27, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "rKwEQtbRljiC" | |
}, | |
"source": [ | |
"Look at the most used tokens. They are mostly service words or prefixes." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Y8oL4rL8QZ8f", | |
"outputId": "a8c56a5f-d5ba-4da1-80fe-4038d86f9efe" | |
}, | |
"source": [ | |
"print(tokenizer.convert_ids_to_tokens([k for k, v in cnt_ru.most_common(30)]))\n", | |
"print(tokenizer.convert_ids_to_tokens([k for k, v in cnt_en.most_common(30)]))" | |
], | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['▁', ',', '</s>', '.', 'и', '▁в', 'а', 'е', '▁не', '▁на', '▁с', 'я', '-', 'ы', '▁по', '▁что', 'у', 'о', 'ом', 'ов', 'ой', '▁за', '▁от', '▁это', '▁В', 'й', '▁у', '▁как', 'ть', '▁«']\n", | |
"['▁', '</s>', '.', '▁the', ',', 's', '▁to', '▁and', 'a', '▁of', '▁in', '▁is', '▁I', '’', '▁that', 'ed', '▁for', '-', 'ing', \"'\", '▁you', '▁it', '▁with', '▁on', 'ly', 'y', '▁be', '▁The', '▁as', '▁are']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "AwwPWiO3Po1x" | |
}, | |
"source": [ | |
"We try the following composition of vocabulary:\n", | |
"* 1K of top tokens of the original tokenizer (just in case)\n", | |
"* Top 10K of the English vocabulary\n", | |
"* Top 20K of the Russian vocabulary (or more, to make the total number of tokens 30K)\n", | |
"* 100 special tokens that T5 uses\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "J-aSMIB1Pxvh", | |
"outputId": "ee53265d-6192-40df-e98c-67b3ca11b285" | |
}, | |
"source": [ | |
"new_tokens = set(range(1000))\n", | |
"for i, (k, v) in enumerate(cnt_en.most_common(10_000)):\n", | |
" if k not in new_tokens:\n", | |
" new_tokens.add(k)\n", | |
"for i, (k, v) in enumerate(cnt_ru.most_common(25_000)):\n", | |
" if len(new_tokens) == 29_900:\n", | |
" print(i, 'Russan tokens are included')\n", | |
" break\n", | |
" if k not in new_tokens:\n", | |
" new_tokens.add(k)\n", | |
"\n", | |
"for t in range(tokenizer.vocab_size - 100, tokenizer.vocab_size):\n", | |
" new_tokens.add(t)\n", | |
"\n", | |
"print(len(new_tokens))\n", | |
"kept_ids = sorted(new_tokens)" | |
], | |
"execution_count": 39, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"20843 Russan tokens are included\n", | |
"30000\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BLAFLhrDoD4U" | |
}, | |
"source": [ | |
"The new vocabulary is only 12% of the original one. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "q21bC7tpTyuW", | |
"outputId": "bed03ca4-c652-4d9b-cb4e-d2caceecc51e" | |
}, | |
"source": [ | |
"len(kept_ids) / tokenizer.vocab_size" | |
], | |
"execution_count": 40, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.11995201919232307" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 40 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "s9ZrtTdcRfN_" | |
}, | |
"source": [ | |
"The plot shows that the tokens that were more frequent in the original vocabulary more frequently get into the new vocabulary (so that the curve bends upward). " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 279 | |
}, | |
"id": "IAPmeDZmRDIf", | |
"outputId": "c5b4526f-cbd5-447c-f372-6869ce6d5324" | |
}, | |
"source": [ | |
"import matplotlib.pyplot as plt\n", | |
"plt.plot(kept_ids)\n", | |
"plt.xlabel('new id of token')\n", | |
"plt.ylabel('old id of token');" | |
], | |
"execution_count": 42, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dd3yV5d3H8c+PQNh7E8Age1QQw3LUVRXUinWPR1FR63zsVmur1rbPY2urrQNbBwoOcPKAVkVUcLLC3hI2EQgkEHYgye/541zRUxpCgJycnOT7fr3O69znutfvygn5cV/XdV+3uTsiIiJlrVq8AxARkcpJCUZERGJCCUZERGJCCUZERGJCCUZERGKierwDqCiaNWvmqamp8Q5DRCShzJo1a4u7Ny9unRJMkJqaSnp6erzDEBFJKGa25mDr1EQmIiIxoQQjIiIxoQQjIiIxoQQjIiIxoQQjIiIxEbMEY2btzGyymS02s0Vmdlcof9DMMs1sbnidG7XPvWaWYWbLzOycqPLBoSzDzO6JKu9gZtND+WtmlhzKa4bPGWF9aqzqKSIixYvlFUw+8HN37wEMBG43sx5h3WPu3ie83gMI664AegKDgRFmlmRmScBTwBCgB3Bl1HH+FI7VCdgKDA/lw4GtofyxsJ2IiJSjmCUYd9/g7rPD8g5gCZBSwi5DgbHunufuq4AMoH94Zbj7SnffB4wFhpqZAWcAb4b9RwEXRh1rVFh+EzgzbC8iIsG4Oet59MNlrMneFZPjl0sfTGiiOh6YHoruMLP5ZjbSzBqHshRgXdRu60PZwcqbAtvcPf+A8n87VlifG7Y/MK6bzSzdzNI3b958VHUUEUkkG3L38NPX5vH4Jxms3JKgCcbM6gFvAT9x9+3A00BHoA+wAfhrrGM4GHd/xt3T3D2tefNiZzoQEamU8vYXAvCXS3tzetcWMTlHTBOMmdUgklxecfe3Adx9k7sXuHsh8CyRJjCATKBd1O5tQ9nByrOBRmZW/YDyfztWWN8wbC8iIkBBeJpxjaTY9R7EchSZAc8DS9z90ajy1lGb/QhYGJYnAFeEEWAdgM7ADGAm0DmMGEsmMhBggkee9TwZuCTsPwwYH3WsYWH5EuAT17OhRUS+VVgY+ZOYVC12CSaWk12eBFwDLDCzuaHs10RGgfUBHFgN/BjA3ReZ2evAYiIj0G539wIAM7sDmAgkASPdfVE43t3AWDP7AzCHSEIjvL9kZhlADpGkJCIiQX5Rgonh+KeYJRh3/wIoLvL3Stjnj8Afiyl/r7j93H0l3zWxRZfvBS49nHhFRKqSgpBgqsXwCkZ38ouIVEGFHvsrGCUYEZEq6NsmskTs5BcRkYqroBz6YJRgRESqoC8ztgDQplGtmJ1DCUZEpIpZsXknI6as4LzvtaZTi/oxO48SjIhIFeLuPPTOYmpWr8YDF/Q49A5HQQlGRKQKeXf+Bj79ejN3ndmZFvVj1zwGSjAiIlVG1va9/Hb8Qnq3bch1J6bG/HxKMCIiVYC7c+/bC9izr4C/XtaH6kmx//OvBCMiUgVMXLSJj5dm8avB3ejUol65nFMJRkSkkttfUMgjE5dybLO65dI0VkQJRkSkkvvDu4tZsXkX953XPaazJx9ICUZEpBJ7PX0do6auYfjJHTize8tyPbcSjIhIJTVrTQ6/GbeQkzs149fndi/38yvBiIhUQrl79nPHq3No1bAWT151fLk2jRWJ5QPHREQkTv78wVKyduQx7rYTaVQnOS4x6ApGRKSS+WL5FsbMWMs1A4/huLaN4haHEoyISCUybWU2N4yaybHN6/GLc7rGNRYlGBGRSiJ7Zx53jZ1D20a1ee3mgdSrGd9eEPXBiIhUAgWFzs/fmMfWXfsZeV0/mtarGe+QdAUjIlIZPDppGVOWbea3P+xBzzYN4x0OoAQjIpLwJi/LYsSUFVye1o5rBh4T73C+pQQjIpLA0lfncOvLs+jWqkHMHyB2uJRgREQS1MLMXK5/cSZtGtbmpeH9qZNcsbrVlWBERBLQjr37ue2V2dSvWZ2XbhxAswrQqX+gipXuRETkkAoLnV+8MY/MbXt47eaBpDSqHe+QiqUrGBGRBPPcFyuZuGgT9w7pRlpqk3iHc1BKMCIiCWTm6hwembiMc3q2ZPjJHeIdTomUYEREEsTmHXnc/sps2jauw58uPg6z8p8h+XAowYiIJICCQueusXPI3bOfEVf3jdsMyYdDnfwiIhWcu/PQO4v4akU2f77kOLq3bhDvkEolZlcwZtbOzCab2WIzW2Rmd4XyJmY2ycyWh/fGodzM7HEzyzCz+WbWN+pYw8L2y81sWFT5CWa2IOzzuIXrxYOdQ0QkET3/xSpGTV3DTad04LK0dvEOp9Ri2USWD/zc3XsAA4HbzawHcA/wsbt3Bj4OnwGGAJ3D62bgaYgkC+ABYADQH3ggKmE8DdwUtd/gUH6wc4iIJJRnP1vJH/61hME9W3HPkPJ/7PHRiFmCcfcN7j47LO8AlgApwFBgVNhsFHBhWB4KjPaIaUAjM2sNnANMcvccd98KTAIGh3UN3H2auzsw+oBjFXcOEZGEMXrqav743hLO+15rnojTY4+PRrl08ptZKnA8MB1o6e4bwqqNQMuwnAKsi9ptfSgrqXx9MeWUcI4D47rZzNLNLH3z5s2HXzERkRh5adoa7h+/iB90b8Fjl/ehRlLijcmKecRmVg94C/iJu2+PXheuPDyW5y/pHO7+jLunuXta8+bNYxmGiEipzViVw+/fWczpXZvz9H+dQHL1xEsuEOMEY2Y1iCSXV9z97VC8KTRvEd6zQnkmEN171TaUlVTetpjyks4hIlKhrcnexU2j02nbuDZ/vSwxr1yKxHIUmQHPA0vc/dGoVROAopFgw4DxUeXXhtFkA4Hc0Mw1ETjbzBqHzv2zgYlh3XYzGxjOde0BxyruHCIiFdbOvHxuGp2OGbxwfT+a1K3497qUJJb3wZwEXAMsMLO5oezXwMPA62Y2HFgDXBbWvQecC2QAu4HrAdw9x8x+D8wM2z3k7jlh+TbgRaA28H54UcI5REQqpH35hdw1Zg4rNu9i9A39OaZp3XiHdNQs0kUhaWlpnp6eHu8wRKQKcnd+8tpcxs/9ht8P7ck1g1LjHVKpmdksd08rbl3iNu6JiFQST3+6gvFzv+GuMzsnVHI5FCUYEZE4+mDhRv78wTIu6N2G/z6zc7zDKVNKMCIicZK+OoefvjaX3u0a8edLjku4GykPRQlGRCQO5q/fxvUvzKR1w1o8e+0J1KqRFO+QypwSjIhIOVuyYTvXPD+DRnVr8MpNA2hRv1a8Q4oJJRgRkXKUkbWT/3puOnWSk3j1xoG0blg73iHFjBKMiEg5WZO9i6ufm4aZ8cqNA2jXpE68Q4opJRgRkXKQuW0Pl/9zGvvyC3nlxgEc27xevEOKOT3RUkQkxtZv3c31L8xkZ14+r/94EF1b1Y93SOVCCUZEJIZWbt7J5c9MI29/Ac8NS6NHm8R43HFZUIIREYmRhZm5XDtyBgBv3noiXVpWjSuXIuqDERGJgRmrcrjymWnUrpHEW1UwuYCuYEREytyUZVnc8vIs2jSqzUvDB5DSqPIORS6JEoyISBl6PX0d941bQJeW9Rl9Q3+a1qsZ75DiRglGRKSMPDJxKU9NXsEpnZvx1NV9aVCrRrxDiislGBGRo1RQ6Dz0ziJGTV3DFf3a8dDQXiRXVxe3EoyIyFHYs6+An70+l/cXbuSmUzpwz5DulW5W5COlBCMicoQ2bd/LLS/PYu66bfzmvO7ceMqx8Q6pQlGCERE5Ais27+Ta52eQs2sfI67qy5DvtY53SBWOEoyIyGGau24bw1+cCcAbtwyiV0rDOEdUMZUqwZjZiUBq9PbuPjpGMYmIVFiz1mzlv56bTrP6yYy6vn+VmLTySB0ywZjZS0BHYC5QEIodUIIRkSrl0683c9vLs2jZoCav3zKo0j4orKyU5gomDejh7h7rYEREKqoPFm7kzjGz6dSiPi9c10/JpRRKM1B7IdAq1oGIiFRU4+as5/ZXZ9MrpSFjbx5Iq4ZKLqVRmiuYZsBiM5sB5BUVuvsFMYtKRKSCePHLVTz4zmIGHduUZ4elUa+mxkaVVml+Ug/GOggRkYrG3RkxZQWPTFzG2T1a8sRVx1OzelK8w0ooh0ww7v6pmR0DdHb3j8ysDqCfsohUWu7O/7y3hGc/X8XQPm3466W9qZ6kqV8O1yF/YmZ2E/Am8M9QlAL8XyyDEhGJl4JC5zf/t5BnP1/FsEHH8NhlfZRcjlBpmshuB/oD0wHcfbmZtYhpVCIicZCXX8Cdr87hw8WbuPW0jvzqnK6YaV6xI1WaBJPn7vuKfshmVp3IfTAiIpXG3v0F3PHqbD5aksUDP+zB9Sd1iHdICa80132fmtmvgdpmdhbwBvDOoXYys5FmlmVmC6PKHjSzTDObG17nRq2718wyzGyZmZ0TVT44lGWY2T1R5R3MbHoof83MkkN5zfA5I6xPLc0PQkSqrtzd+xk+aiYfLcniQSWXMlOaBHMPsBlYAPwYeM/d7yvFfi8Cg4spf8zd+4TXewBm1gO4AugZ9hlhZklmlgQ8BQwBegBXhm0B/hSO1QnYCgwP5cOBraH8sbCdiEix1m/dzcX/+IoZq3L4y6W9uU7JpcyUJsE86O7Puvul7n4JMNLMXjnUTu7+GZBTyjiGAmPdPc/dVwEZRPp9+gMZ7r7S3fcBY4GhFmmvO4PI4AOAUcCFUccaFZbfBM40NaKKSDEWZubyoxFfsWn7XkbfMIBLTmgb75AqldIkmHZmdi9AaIZ6C1h+FOe8w8zmhya0xqEsBVgXtc36UHaw8qbANnfPP6D8344V1ueG7f+Dmd1sZulmlr558+ajqJKIJJrJy7K47J9TSU6qxlu3nsigjsX+mZCjUJoEcwPwvZBk3gU+dfcHj/B8TxOZOLMPsAH46xEep0y4+zPunubuac2bN49nKCJSjsbOWMuNo9Lp0Kwub992Il1a1o93SJXSQUeRmVnfqI9/J3IfzJdEOv37uvvswz2Zu2+KOv6zRBIWQCbQLmrTtqGMg5RnA43MrHq4SonevuhY68OIt4ZhexGp4tydRyd9zROfZHBql+Y8dXVfTf0SQyX9ZA+8uthKpKP9r0SGKZ9xuCczs9buviF8/BGRiTQBJgCvmtmjQBugMzADMKCzmXUgkjiuAK5ydzezycAlRPplhgHjo441DJga1n+imaBFZM++Au5+az4T5n3DFf3a8fsLe1FDN1DG1EETjLuffjQHNrMxwGlAMzNbDzwAnGZmfYgkqNVERqXh7ovM7HVgMZAP3O7uBeE4dwATiUxPM9LdF4VT3A2MNbM/AHOA50P588BLZpZBZJDBFUdTDxFJfLl79nPjqJmkr9nKL8/pym2nddQNlOXADvWfezNrSCQ5fD8UfQo85O65MY6tXKWlpXl6enq8wxCRMrYuZzfXvTCDtTm7eezyPpx/XJt4h1SpmNksd08rbl1prg9HAjuAy8JrO/BC2YUnIhIbCzNzuejpr9i8I4/RNwxQcilnpend6ujuF0d9/p2ZzY1VQCIiZWH++m0MGzmD5OqRYcidNVKs3JXmCmaPmZ1c9MHMTgL2xC4kEZGjM2VZFlc8M406ydUZe/MgJZc4Kc0VzC3A6NAXA5HRZMNiF5KIyJHJLyjk8Y+X8+TkDLq2asCo6/vRooEebxwvpUkw2929t5k1AHD37WHYsIhIhbFnXwG3vzqbT5ZmcVHfFP5wYS/qJOsel3gqzU//LaCvu2+PKnsTOCE2IYmIHJ5N2/dyy8uzmLduG3/8US+uHnBMvEMSSr6TvxuR2Y0bmtlFUasaALrmFJEKYcaqHG57ZRa78goYcXVfBvdqHe+QJCjpCqYrcD7QCPhhVPkO4KZYBiUiUhpjZqzl/vELadu4Dq/eNFBzilUwJd3JPx4Yb2aD3H1qOcYkIlKi3fvy+c24hbw9J5NTuzTn8SuPp2HtGvEOSw5wyD4YJRcRqUi27Mxj+IszmZ+Zy11ndubOMzpRXXOKVUgaYiEiCWPqimx+8cY8snfl8ew1afygR8t4hyQlOGjaN7O7wvtJ5ReOiMh/Kih0/vbR11z13DSSq1fjtZsHKbkkgJKuYK4n8hyYJ4C+JWwnIhIz63J28/M35jFjVQ4XHZ/CH36k+1sSRUnf0hIzWw60MbP5UeUGuLsfF9vQRKSq+3DRRn7xxjwKHf5yaW8u7puiafYTSEmjyK40s1ZEnsVyQfmFJCJV3d79Bfzve0sYNXUNvVIaMOKqE2jftE68w5LDVOJ1prtvBHqbWTLQJRQvc/f9MY9MRKqkFZt3cserc1iyYTvDT+7ArwZ3pWb1pHiHJUfgkA2ZZnYqMJrIEygNaGdmw9z9sxjHJiJViLvz6oy1PPTOYuokJ/H8sDTO7K6O/ERWmp6yR4Gz3X0ZgJl1AcaguchEpIzk7t7PL96cx6TFmzilczP+cmlvWmoW5IRXmgRToyi5ALj712amW2ZFpEwsWJ/Lba/OYsO2vfzmvO7ccFIHqlVTR35lUJoEk25mzwEvh89XA3p4vYgcFXfn5Wlr+P27S2hWL5nXbxlE3/aN4x2WlKHSJJhbgduB/w6fPwdGxCwiEan0dublc+/bC3hn3jec1rU5j13Wh8Z1k+MdlpSx0sxFlkekH+bR2IcjIpXdso07uPWVWazesotfntOVW0/tqCaxSkq3w4pIuXB33py1nt+OX0i9mjV4+cYBnNixWbzDkhhSghGRmFu+aQcPvrOILzOyGXhsEx6/8nha1NcoscpOCUZEYmb3vnz+/vFynvt8FXWTk/jdBT25ekB7Ta9fRZT0yOR3AD/YenfX9DEiclDTVmbzyzfnsS5nD5elteXuwd1oWq9mvMOSclTSFcxfwvtFQCu+G6Z8JbAplkGJSOLas6+AP09cygtfrqZ9kzqMuWkggzo2jXdYEgclTXb5KYCZ/dXd06JWvWNmug9GRP7DzNU5/PKNeazO3s2wQcdw95Bumlq/CivNN1/XzI5195UAZtYBqBvbsEQkkWzfu5+/f7ScF75cRZtGtXXVIkDpEsxPgSlmtpLIZJfHAD+OaVQikhDcnXfmb+ChdxaTvSuPK/u359fndqdeTV21SOlutPzAzDoD3ULR0nDzpYhUYWuzd/Pb8Qv59OvNHNe2ISOvS+O4to3iHZZUIAcdK2hmZ4T3i4DzgI7hdV4oK5GZjTSzLDNbGFXWxMwmmdny8N44lJuZPW5mGWY238z6Ru0zLGy/3MyGRZWfYGYLwj6PW3jM3cHOISJlo6DQee7zlZzzt8+YtWYrvz2/B2/feqKSi/yHkgajnxref1jM6/xSHPtFYPABZfcAH7t7Z+Dj8BlgCNA5vG4GnoZIsgAeAAYA/YEHohLG08BNUfsNPsQ5ROQoLdu4g4uf/oo//GsJgzo25cOffp/hJ3fQfS1SrJJGkT0Q3q8/kgO7+2dmlnpA8VDgtLA8CpgC3B3KR7u7A9PMrJGZtQ7bTnL3HAAzmwQMNrMpQAN3nxbKRwMXAu+XcA4ROUJ5+QWMmLyCEVMyqF+rBn+/og8X9G5DaDgQKVZ598S1dPcNYXkjUPS4uhRgXdR260NZSeXriykv6Rz/wcxuJnLFRPv27Q+3LiJVwvSV2TwwYRFLN+5gaJ823H9+D90wKaUSt6Ee7u5mdtCZAsrjHO7+DPAMQFpaWkxjEUk0Wdv38vAHS3l7diatG9bSI4zlsJV3gtlkZq3dfUNoAssK5ZlAu6jt2oayTL5r7ioqnxLK2xazfUnnEJFS+mjxJn711nx27s3n9tM7cucZnalVIyneYUmCKWkushJHirn720dwvgnAMODh8D4+qvwOMxtLpEM/NySIicD/RHXsnw3c6+45ZrbdzAYC04FrgScOcQ4ROYTte/fzx3eX8Fr6Orq1qs/rPx5Epxb14h2WJKiSrmB+GN5bACcCn4TPpwNfASUmGDMbQ+Tqo5mZrScyGuxh4HUzGw6sAS4Lm78HnAtkALuB6wFCIvk9MDNs91BRhz9wG5GRarWJdO6/H8oPdg4ROQh35/2FG7l//CKyd+Vxy6kd+dlZXUiurtFhcuQsMnCrhA3MPgSGFXWch2anF939nHKIr9ykpaV5erqmWJOqJ3f3fh6YsJD/m/sNPds04OGLjuN7bRvGOyxJEGY264D5Kr9Vmj6YdlGjsiAyk7KGXIlUAu/O/4YHxi9i25793HVmZ+48o5PuaZEyU5oE83HoCxkTPl8OfBS7kEQk1tbl7OZ37yzmoyWb6N22IaNu6E+vFF21SNkqzVxkd4QO/1NC0TPuPi62YYlILOzLL+TZz1fy5CcZVDO4Z0g3btSd+BIjpRqmHEaMHcmoMRGpANydSYs38fAHS1m5eRdn92jJgxf0pE2j2vEOTSqxkoYp76D4RyYbkXsYG8QsKhEpM19v2sGDExbx1YpsOjavqxsmpdyUNBdZ/fIMRETK1o69+3nykwxGfrmKujWr89DQnlzZvz011Bwm5aRUTWRm1pvv+mA+c/f5sQtJRI7Wx0s28etxC8jakcfFfdty75Bumj9Myt0hE4yZ3UVkWvyiPphXzOwZd3+ihN1EJA5WbN7JH/+1hE+WZtG1ZX3+eU0afdrpOS0SH6W5ghkODHD3XQBm9idgKt9NzSIicZa7ez9/+/hrXpq6hto1kvj1ud0YdmIqNatr/jCJn9IkGAMKoj4XhDIRqQAmLd7Er96cR+6e/VzRvz0/O6sLzdQcJhVAaRLMC8B0Myu69+VC4PnYhSQipbErL59HJi7jxa9W0yulAa/eNJDurTW4UyqO0txo+Wh4guTJoeh6d58T06hE5KAKC51xczL5y4fL2JC7l2sHHcN953VXc5hUOKW90XI2MDvGsYjIIXy1Ygt//NcSFn2znePaNuSJK48nLbVJvMMSKVbcnmgpIqWXkbWTh99fwkdLskhpVJu/X9GHHx7XhmrV1B0qFZcSjEgFlr0zj799tJxXZ6ylTo0k7h7cjetPStXTJSUhKMGIVEB79xcw8stVjJi8gj37C7iqf3t+8oPOullSEooSjEgF4u6Mn/sNj0xcRua2PfygewvuGdJdjy2WhKQEI1IBuDuffr2Z/31vKcs27aBXSgMeufQ4TuzYLN6hiRwxJRiROFuYmct94xYwb30u7ZvU4bHLezO0d4o68CXhKcGIxEnmtj08/P5S3p3/Dc3q1eRPF3+PoX1S1IEvlYYSjEg527OvgGc/X8k/Pl1BoTu3nNqRW77fkYZ1asQ7NJEypQQjUk5y9+xn1FerGT11DVt25jG4ZyvuO6877ZrUiXdoIjGhBCMSY/sLChn11WqenJzBtt37Ob1rc249rRP9O+gOfKnclGBEYsTdmbR4Ew+9u5j1W/dwSudm3D24G71SGsY7NJFyoQQjEgPz1m3jkYnL+CJjC11b1mfkdWmc3rUFZhoZJlWHEoxIGXF3Plu+hRGTM5i+KofGdWrw2/N7cO2gY6iRVC3e4YmUOyUYkTKwfNMO7h+/iKkrs0lpVJt7h3TjqgHtqV9LI8Ok6lKCETkKmdv28PhHy3lj1jrq16rB7y7oyZX925NcXVcsIkowIkcgc9seRkzO4PX0dQBcOyiVO8/opMkoRaIowYgchtVbdvHPz1by5qxIYrksrR23nd6JlEa14xyZSMUTlwRjZquBHUABkO/uaWbWBHgNSAVWA5e5+1aLDLv5O3AusBu4LjxhEzMbBvwmHPYP7j4qlJ8AvAjUBt4D7nJ3L5fKSaWUvTOPxz76mrEz1lGtmimxiJRCPK9gTnf3LVGf7wE+dveHzeye8PluYAjQObwGAE8DA0JCegBIAxyYZWYT3H1r2OYmYDqRBDMYeL98qiWVyd79BTz/xSqenvLdc1nuPLMTLerXindoIhVeRWoiGwqcFpZHAVOIJJihwOhwBTLNzBqZWeuw7SR3zwEws0nAYDObAjRw92mhfDRwIUowchj27Cvglelr+OdnK9m8I4+zerTk7sHd9FwWkcMQrwTjwIdm5sA/3f0ZoKW7bwjrNwItw3IKsC5q3/WhrKTy9cWU/wczuxm4GaB9+/ZHUx+pJPbuL+DlaWt4esoKsnft48SOTXnyyuMZcGzTeIcmknDilWBOdvdMM2sBTDKzpdEr3d1D8ompkNieAUhLS1MfTRW2L7+Q19PX8eQnGWzcvpeTOjXlJz/oQr9UzRcmcqTikmDcPTO8Z5nZOKA/sMnMWrv7htAElhU2zwTaRe3eNpRl8l2TWlH5lFDetpjtRf5DXn4BE+Z+w1OTM1idvZt+qY157PI+DOqoKxaRo1XuCcbM6gLV3H1HWD4beAiYAAwDHg7v48MuE4A7zGwskU7+3JCEJgL/Y2aNw3ZnA/e6e46ZbTezgUQ6+a8Fniiv+kli2JC7h7Ez1vHK9LVs2ZlHl5b1eOH6fpzWpbnmCxMpI/G4gmkJjAv/iKsDr7r7B2Y2E3jdzIYDa4DLwvbvERminEFkmPL1ACGR/B6YGbZ7qKjDH7iN74Ypv486+CXI3LaHv036mrdmr8eB07o0Z/jJx3JSp6ZKLCJlzHR7SERaWpqnp6fHOwyJkYysHYyYvILx876hmsE1A1O57sRU2jfVw75EjoaZzXL3tOLWVaRhyiJlyt2ZsSqHZz9fycdLs6hZvRrDBqUy/JQOukFSpBwowUilU1jofLh4E09/uoJ567bRpG4yd5zeietP6kCTusnxDk+kylCCkUpjz74CXpu5lhe/Ws3q7N20b1KH3w/tySUntKN2clK8wxOpcpRgJOFl78zj1elrGTV1DVt25nHCMY352dldObdXK6rrQV8icaMEIwlrbfZuRn65ijEz1pKXX8j3uzTnttM6MlB33YtUCEowknBmrcnh2c9W8eHijVQz44I+bbjttI50alE/3qGJSBQlGEkI+QWFTFy0iee+WMmctdtoUKs6Pz61I8MGpdKqoWY2FqmIlGCkQtu6ax+vpa/j5WlrWL91D8c0rcNDQ3tycd+21K2pX1+Rikz/QqXCcXdmrdnK2JnreGfeN+TlF9IvtTG/Oa8HZ/VoSVI13XEvkgiUYKTC2L0vn7dnZzJ66mq+3laMDiAAAA0GSURBVLSTuslJXHxCW647MZUuLdW/IpJolGAk7lZv2cWYGWsZM2Mt2/fm0yulAX+++DjOO661msFEEpj+9Upc7Msv5INFGxkzfS1TV2aTVM0Y3LMVN5ycSt/2jTXxpEgloAQj5WpN9i7GzFjHm7PWsWXnPto1qc3Pz+rCZf3a0bKBRoOJVCZKMBJz+wsKmbR4E2NmrOXz5VtIqmac0a0FV/Vvz6ldmlNNnfYilZISjMTM2uzdjJ25ltfT17NlZx5tGtbiZ2d14bK0drp3RaQKUIKRMrVnXwGTl2V9e7VSzeCMbi25akA7Tu3SQkOMRaoQJRg5aoWFzozVOYybncm/FmxgZ14+bRrW4qc/6MJl/drSuqGevSJSFSnByBFbsXkn42ZnMm5OJpnb9lA3OYnBvVrzo+NTGNSxqa5WRKo4JRg5LFnb9/LO/A1MmJvJvPW5VDM4uXNzfjW4K2f1aEmdZP1KiUiE/hrIIeXu3s/7CzcwYd43TF2ZjTv0SmnAfed2Z2ifNrTQ8GIRKYYSjBRr8448Ply8kQ8WbmTqimzyC51jm9XlztM7cUGfFDq1qBfvEEWkglOCkW99s20PHyyMJJWZa3Jwh2Oa1mH4KR04t1drjmvbUHfYi0ipKcFUcau27ApJZQPz1ucC0K1Vff77jM4M+V4rurasr6QiIkdECaaKcXeWbNjBR0s28d6CDSzduAOA3m0bcvfgbpzTsyXHNlfzl4gcPSWYKmDv/gK+zNjC5GVZfLwkiw25ewHon9qE+8/vwTm9WpHSSPeqiEjZUoKppFZv2cUXGVuYvDSLL1dsYe/+QuokJ3FSp2b89KwunNqluSaXFJGYUoKpJAoLnbnrtzFl2WY+XLTx26avlEa1uaJfe87o1oIBxzahZvWkOEcqIlWFEkwCy96Zx7SVOUxZlsWUrzezeUce1QxOOKYx95/fg9O7tSC1aR110otIXCjBJJCsHXuZvjKHaSuzmb4qh4ysnQA0qFWdU7o056zuLTmta3Ma1UmOc6QiIkowFdrG3L0hmWQzfWUOK7fsAqBuchJpqU24qG8KAzo0pXfbhlRPqhbnaEVE/p0STAVRUOis2rKTWWu2Mm1lDnPWbmV19m4A6tesTr8OTbi8XzsGHtuUnm0aKKGISIVXaROMmQ0G/g4kAc+5+8NxDulb+/ILWbF5Jwsyc1mUmcvCb7azdMN2du0rAKBZvWT6tm/M1QOOYVDHpnRv3UAzE4tIwqmUCcbMkoCngLOA9cBMM5vg7otjed7d+/LZsmMfW3blsWVHHtm79pG9M48tO/exZWceW3bmkbUjj7XZu8kvdCDS3NWzTUMuTWtHzzYNOL59Izo2r6eOeRFJeJUywQD9gQx3XwlgZmOBoUCZJ5jHP17OG7PWsWXHPvbsLyh2m/o1q9Osfk2a1k2mS4v6DO7Ziq6t6tMrpSEdmtbVM+lFpFKqrAkmBVgX9Xk9MODAjczsZuBmgPbt2x/RiVo2qMkJ7RvTtF5NmtWrSdN6yTQP783q1aRJ3WRq1dC9JyJS9VTWBFMq7v4M8AxAWlqaH8kxLu/Xnsv7HVlyEhGpzCrrUKRMoF3U57ahTEREykllTTAzgc5m1sHMkoErgAlxjklEpEqplE1k7p5vZncAE4kMUx7p7oviHJaISJVSKRMMgLu/B7wX7zhERKqqytpEJiIicaYEIyIiMaEEIyIiMaEEIyIiMWHuR3R/YaVjZpuBNUe4ezNgSxmGE0+qS8VTWeoBqktFdTR1Ocbdmxe3QgmmDJhZurunxTuOsqC6VDyVpR6gulRUsaqLmshERCQmlGBERCQmlGDKxjPxDqAMqS4VT2WpB6guFVVM6qI+GBERiQldwYiISEwowYiISEwowRwlMxtsZsvMLMPM7ol3PMUxs9VmtsDM5ppZeihrYmaTzGx5eG8cys3MHg/1mW9mfaOOMyxsv9zMhpVT7CPNLMvMFkaVlVnsZnZC+NlkhH1j9vzqg9TlQTPLDN/NXDM7N2rdvSGuZWZ2TlR5sb9z4fEU00P5a+FRFbGoRzszm2xmi81skZndFcoT7nspoS6J+L3UMrMZZjYv1OV3JZ3fzGqGzxlhfeqR1vGg3F2vI3wReRTACuBYIBmYB/SId1zFxLkaaHZA2Z+Be8LyPcCfwvK5wPuAAQOB6aG8CbAyvDcOy43LIfbvA32BhbGIHZgRtrWw75ByrsuDwC+K2bZH+H2qCXQIv2dJJf3OAa8DV4TlfwC3xqgerYG+Ybk+8HWIN+G+lxLqkojfiwH1wnINYHr4GRZ7fuA24B9h+QrgtSOt48FeuoI5Ov2BDHdf6e77gLHA0DjHVFpDgVFheRRwYVT5aI+YBjQys9bAOcAkd89x963AJGBwrIN098+AnFjEHtY1cPdpHvmXNTrqWOVVl4MZCox19zx3XwVkEPl9K/Z3LvwP/wzgzbB/9M+lTLn7BnefHZZ3AEuAFBLweymhLgdTkb8Xd/ed4WON8PISzh/9fb0JnBniPaw6lhSTEszRSQHWRX1eT8m/nPHiwIdmNsvMbg5lLd19Q1jeCLQMywerU0Wqa1nFnhKWDywvb3eEpqORRc1KHH5dmgLb3D3/gPKYCs0qxxP533JCfy8H1AUS8HsxsyQzmwtkEUnYK0o4/7cxh/W5Id4y+xugBFM1nOzufYEhwO1m9v3oleF/iQk5Xj2RYw+eBjoCfYANwF/jG07pmVk94C3gJ+6+PXpdon0vxdQlIb8Xdy9w9z5AWyJXHN3iGY8SzNHJBNpFfW4byioUd88M71nAOCK/eJtCUwThPStsfrA6VaS6llXsmWH5wPJy4+6bwh+FQuBZIt8NHH5dsok0PVU/oDwmzKwGkT/Ir7j726E4Ib+X4uqSqN9LEXffBkwGBpVw/m9jDusbhnjL7G+AEszRmQl0DqM0kol0lE2Ic0z/xszqmln9omXgbGAhkTiLRu0MA8aH5QnAtWHkz0AgNzR7TATONrPGobng7FAWD2USe1i33cwGhrbna6OOVS6K/iAHPyLy3UCkLleEkT4dgM5EOr6L/Z0LVwyTgUvC/tE/l7KO2YDngSXu/mjUqoT7Xg5WlwT9XpqbWaOwXBs4i0if0sHOH/19XQJ8EuI9rDqWGFRZj2Soai8iI2S+JtLWeV+84ykmvmOJjPaYBywqipFIW+vHwHLgI6BJKDfgqVCfBUBa1LFuINLhlwFcX07xjyHSRLGfSJvv8LKMHUgj8sdjBfAkYXaLcqzLSyHW+eEfa+uo7e8LcS0jahTVwX7nwnc9I9TxDaBmjOpxMpHmr/nA3PA6NxG/lxLqkojfy3HAnBDzQuD+ks4P1AqfM8L6Y4+0jgd7aaoYERGJCTWRiYhITCjBiIhITCjBiIhITCjBiIhITCjBiIhITCjBiJQTM0szs8cPsm61mTU7jGNdamZLzGzyAeWpZnZVKfa/zsyeLO35RI6EEoxIOXH3dHf/7zI63HDgJnc//YDyVOCQCUakPCjBiAThf/9LzOxZizxP48NwRzRm1tHMPggThn5uZt3CxIKrwh3qjcysoGieNzP7zMw6H3D808zs3bDcNBx/kZk9R+RmxOJiutIiz0VZaGZ/CmX3E7lB8Hkze+SAXR4GTrHIM0x+apFnhLwQjjHHzA5MSJjZeWY21cyamdnZYXm2mb1hkTm6iq6wfhfKF5hZXOe4ksSgBCPy7zoDT7l7T2AbcHEofwa4091PAH4BjHD3AiJ3Ovcg8gd/NpE/7jWBdu6+vITzPAB8Ec4zDmh/4AZm1gb4E5Hp1vsA/czsQnd/CEgHrnb3Xx6w2z3A5+7ex90fA24nMvfk94ArgVFmVivqHD8K+xQ9UOs3wA88MjlqOvCzqGNvCeVPh5+BSImqH3oTkSpllbvPDcuzgNTwv/gTgTfsuwcr1gzvnxN5kFgH4H+Bm4BPiczbVJLvAxcBuPu/zGxrMdv0A6a4+2YAM3sl7Pd/h1Gfk4EnwnmWmtkaoEtYdwaRKVnOdvftZnY+kWT5ZahnMjA16lhFk1rOKopdpCRKMCL/Li9quQCoTeRKf5tHpkE/0GfArUAb4H7gl8BpRBJPRVf0dMIuRK5WjMgDwK48yPZFP5sC9LdDSkFNZCKH4JHng6wys0vh22fM9w6rZxC5uil0971EJkv8MZHEU5LPCJ3xZjaEyCODDzQDODX0jSQRaeL69BDH3UHk0b9FPgeuDufpQqQpbllYt4ZIE+BoM+sJTANOMrNOYfu6YR+RI6IEI1I6VwPDzaxoVuqhAO6eR+Qpf9PCdp8T+QO/4BDH+x3wfTNbRKS5ae2BG3hk2vp7iEy3Pg+Y5e6Hmup9PlBgZvPM7KfACKCamS0AXgOuCzEXnWNpqNsbQAPgOmCMmc0n0jymznw5YppNWUREYkJXMCIiEhNKMCIiEhNKMCIiEhNKMCIiEhNKMCIiEhNKMCIiEhNKMCIiEhP/D3XNhJEm46cWAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [], | |
"needs_background": "light" | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "IaaCyAPlomLt" | |
}, | |
"source": [ | |
"### Update the embeddings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "k-BNn3R6R0lY" | |
}, | |
"source": [ | |
"import torch" | |
], | |
"execution_count": 43, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "P5033SckRzzo" | |
}, | |
"source": [ | |
"new_size = len(kept_ids)\n", | |
"new_emb = torch.nn.Embedding(new_size, model.shared.embedding_dim)\n", | |
"new_head = torch.nn.Linear(in_features=model.lm_head.in_features, out_features=new_size, bias=False)" | |
], | |
"execution_count": 44, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "CjD6LS_9fe_M" | |
}, | |
"source": [ | |
"for new_id, old_id in enumerate(kept_ids):\n", | |
" new_emb.weight.data[new_id] = model.shared.weight.data[old_id]\n", | |
" new_head.weight.data[new_id] = model.lm_head.weight.data[old_id]" | |
], | |
"execution_count": 45, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vv7IuBORRseE" | |
}, | |
"source": [ | |
"model.shared.weight = new_emb.weight\n", | |
"model.lm_head.weight = new_head.weight" | |
], | |
"execution_count": 46, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "QcIDtmymo56s" | |
}, | |
"source": [ | |
"The new model has 244M parameters - 42% of the original size. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "g_aPBQ20kvCB", | |
"outputId": "d2289964-728d-45b2-afc9-e366e1d6b98b" | |
}, | |
"source": [ | |
"print(msize(model), msize(model) / original_size)" | |
], | |
"execution_count": 48, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"244309248 0.4194861110195362\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "vdKmFJY_k7xZ" | |
}, | |
"source": [ | |
"### Update the tokenizer" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "-X25sG0jmc83" | |
}, | |
"source": [ | |
"T5 uses Sentencepiece tokenizer, which is implemented in C and is opaque to Python. \n", | |
"\n", | |
"Fortunately, we can download its model and deploy it into Python using its Protobuf representation. \n", | |
"\n", | |
"https://github.com/google/sentencepiece/issues/121" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OpII_eX3mY80" | |
}, | |
"source": [ | |
"!wget https://raw.githubusercontent.com/google/sentencepiece/master/src/sentencepiece_model.proto" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "SGb1DiYmpnkr" | |
}, | |
"source": [ | |
"We compile the protobuf description of the sentencepiece model in order to be able to modify it. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "I6B0MA5DmaZM" | |
}, | |
"source": [ | |
"! protoc --python_out=. sentencepiece_model.proto" | |
], | |
"execution_count": 51, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "nJwHRRzbngJY" | |
}, | |
"source": [ | |
"Now we can serialize the model used by the current tokenizer and open it as a protobuf class. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 117, | |
"referenced_widgets": [ | |
"b432b120350b42388447dcfcf959d673", | |
"5660d17e6c0e40b7bee6dc3d1af46f34", | |
"9dcf8c4bef5342aba837077c4904d852", | |
"d9a80cc59ada42fe908726abd05942bf", | |
"847d7a0c9f4a418e93484cd9bf8b8e0a", | |
"6c74fe4ffc4f4013bf73d00212e5775a", | |
"5a17271685624ad3bd2aa9c88504f969", | |
"1780d72d89b5449eaa0dcea9c595b6d7" | |
] | |
}, | |
"id": "MdQM0L3lnybA", | |
"outputId": "aa4db10b-f8c2-48fe-8e67-2afa6586550c" | |
}, | |
"source": [ | |
"import sentencepiece_model_pb2 as spmp\n", | |
"smp = tokenizer.sp_model.serialized_model_proto()\n", | |
"m = spmp.ModelProto()\n", | |
"m.ParseFromString(smp)\n", | |
"\n", | |
"print('the loaded model has pieces:', len(m.pieces))\n", | |
"new_pieces = [m.pieces[idx] for idx in kept_ids]\n", | |
"print('the new pieces:', len(new_pieces))\n", | |
"\n", | |
"# replace the content of the first 30K pieces\n", | |
"for i, p in enumerate(new_pieces):\n", | |
" m.pieces[i].piece = p.piece\n", | |
" m.pieces[i].score = p.score\n", | |
" m.pieces[i].type = p.type\n", | |
"\n", | |
"# drop the remaining pieces\n", | |
"n = len(new_pieces)\n", | |
"for i in trange(len(m.pieces) - n):\n", | |
" m.pieces.pop(len(m.pieces) - 1)\n", | |
"\n", | |
"print(len(m.pieces))\n", | |
"with open('new_sp.model', 'wb') as f:\n", | |
" f.write(m.SerializeToString())" | |
], | |
"execution_count": 56, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"the loaded model has pieces: 250100\n", | |
"the new pieces: 30000\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "b432b120350b42388447dcfcf959d673", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=220100.0), HTML(value='')))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"30000\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qWeP6N1sry93" | |
}, | |
"source": [ | |
"new_tokenizer = T5Tokenizer('new_sp.model', extra_ids=0)" | |
], | |
"execution_count": 78, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "czfXG1IqsDT4" | |
}, | |
"source": [ | |
"### Save the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "oanCNPiIsCdU", | |
"outputId": "574a65e7-a1f4-465b-aa28-f6b2d9990200" | |
}, | |
"source": [ | |
"model.config.__dict__['vocab_size'] = new_size\n", | |
"model.config.__dict__['_name_or_path'] = 'cointegrated/rut5-base'\n", | |
"model.config" | |
], | |
"execution_count": 79, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"T5Config {\n", | |
" \"_name_or_path\": \"cointegrated/rut5-base\",\n", | |
" \"architectures\": [\n", | |
" \"T5ForConditionalGeneration\"\n", | |
" ],\n", | |
" \"d_ff\": 2048,\n", | |
" \"d_kv\": 64,\n", | |
" \"d_model\": 768,\n", | |
" \"decoder_start_token_id\": 0,\n", | |
" \"dropout_rate\": 0.1,\n", | |
" \"eos_token_id\": 1,\n", | |
" \"feed_forward_proj\": \"gated-gelu\",\n", | |
" \"initializer_factor\": 1.0,\n", | |
" \"is_encoder_decoder\": true,\n", | |
" \"layer_norm_epsilon\": 1e-06,\n", | |
" \"model_type\": \"t5\",\n", | |
" \"num_decoder_layers\": 12,\n", | |
" \"num_heads\": 12,\n", | |
" \"num_layers\": 12,\n", | |
" \"output_past\": true,\n", | |
" \"pad_token_id\": 0,\n", | |
" \"relative_attention_num_buckets\": 32,\n", | |
" \"tie_word_embeddings\": false,\n", | |
" \"tokenizer_class\": \"T5Tokenizer\",\n", | |
" \"transformers_version\": \"4.5.1\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 30000\n", | |
"}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 79 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UaebisNqr4Mk" | |
}, | |
"source": [ | |
"new_tokenizer.save_pretrained('rut5-base')\n", | |
"model.save_pretrained('rut5-base')" | |
], | |
"execution_count": 81, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "nIoB98_9r7VU", | |
"outputId": "b7d1858d-d51f-4cc6-ecc2-86bc59f6a36d" | |
}, | |
"source": [ | |
"!ls rut5-base -alsh" | |
], | |
"execution_count": 82, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"total 933M\n", | |
"4.0K drwxr-xr-x 2 root root 4.0K Apr 30 21:26 .\n", | |
"4.0K drwxr-xr-x 1 root root 4.0K Apr 30 21:26 ..\n", | |
"4.0K -rw-r--r-- 1 root root 677 Apr 30 21:33 config.json\n", | |
"933M -rw-r--r-- 1 root root 933M Apr 30 21:33 pytorch_model.bin\n", | |
"4.0K -rw-r--r-- 1 root root 65 Apr 30 21:33 special_tokens_map.json\n", | |
"812K -rw-r--r-- 1 root root 809K Apr 30 21:33 spiece.model\n", | |
"4.0K -rw-r--r-- 1 root root 116 Apr 30 21:33 tokenizer_config.json\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "5gFLD5dUs7gZ" | |
}, | |
"source": [ | |
"Now try to load the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ewebox5usyq9" | |
}, | |
"source": [ | |
"model1 = T5ForConditionalGeneration.from_pretrained('rut5-base')\n", | |
"tokenizer1 = T5Tokenizer.from_pretrained('rut5-base')" | |
], | |
"execution_count": 83, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "_GVnO2C0ruQx" | |
}, | |
"source": [ | |
"The model has not been fine-tuned on any sensible task except filling the gaps. And even this task is performed strangely - the models continues generating when it should have stopped. \n", | |
"\n", | |
"But we hope that after fine-tuning it will be better. But this is the topic of the next story)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "08zibfjgtNhF", | |
"outputId": "9e9f2025-54d0-4a21-ede0-a023c38383b7" | |
}, | |
"source": [ | |
"inputs = tokenizer1('The <extra_id_0> walks in <extra_id_1> park.', return_tensors='pt')\n", | |
"with torch.no_grad():\n", | |
" hypotheses = model1.generate(\n", | |
" **inputs, \n", | |
" do_sample=True, top_p=0.95, \n", | |
" num_return_sequences=3, \n", | |
" repetition_penalty=2.5,\n", | |
" max_length=32,\n", | |
" )\n", | |
"for h in hypotheses:\n", | |
" print(tokenizer1.decode(h))" | |
], | |
"execution_count": 88, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"<pad> <extra_id_0> evening on <extra_id_1> the <extra_id_2> the park</s> <pad> <pad> <pad>\n", | |
"<pad> <extra_id_0> Great <extra_id_1> the <extra_id_2> a <extra_id_3> nature center,</s> <pad>\n", | |
"<pad> <extra_id_0> forest <extra_id_1> this <extra_id_2> a <extra_id_3> summer in the...</s>\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "tsR9lH3_uqF3", | |
"outputId": "a6f14551-247b-45e1-e3a9-fac939079322" | |
}, | |
"source": [ | |
"inputs = tokenizer1('Красивая <extra_id_0> гуляет <extra_id_1> парку.', return_tensors='pt')\n", | |
"with torch.no_grad():\n", | |
" hypotheses = model1.generate(\n", | |
" **inputs, \n", | |
" do_sample=True, top_p=0.95, \n", | |
" num_return_sequences=3, \n", | |
" repetition_penalty=2.5,\n", | |
" max_length=32,\n", | |
" )\n", | |
"for h in hypotheses:\n", | |
" print(tokenizer1.decode(h))" | |
], | |
"execution_count": 89, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"<pad> <extra_id_0> птица <extra_id_1> в <extra_id_2>, <extra_id_3>. Гул <extra_id_4>! Красивый <extra_id_5> молодец</s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>\n", | |
"<pad> <extra_id_0> музыка <extra_id_1> в <extra_id_2> в <extra_id_3> осеннее платье в <extra_id_4> в <extra_id_5> и</s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>\n", | |
"<pad> <extra_id_0> женщина, она <extra_id_1> по <extra_id_2> в <extra_id_3>. Красивый <extra_id_39>! Настроение - красиво во всем лесном</s>\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "2nZt98FYwcex" | |
}, | |
"source": [ | |
"I will save the model on my Google drive to retrieve it later for fine-tuning. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "iMG9dNShwg9U", | |
"outputId": "9a9de585-085e-47a2-8e03-068a95e6857f" | |
}, | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/gd')" | |
], | |
"execution_count": 91, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Mounted at /gd\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "_j56QoXBwjCS", | |
"outputId": "c3df1fb9-49b6-4fbd-94c6-b0ffc38e2fca" | |
}, | |
"source": [ | |
"model1.save_pretrained('/gd/MyDrive/models/rut5-base-raw')\n", | |
"tokenizer1.save_pretrained('/gd/MyDrive/models/rut5-base-raw')" | |
], | |
"execution_count": 92, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"('/gd/MyDrive/models/rut5-base-raw/tokenizer_config.json',\n", | |
" '/gd/MyDrive/models/rut5-base-raw/special_tokens_map.json',\n", | |
" '/gd/MyDrive/models/rut5-base-raw/spiece.model',\n", | |
" '/gd/MyDrive/models/rut5-base-raw/added_tokens.json')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 92 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "57AlTaqpw2Ew" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment