Last active
September 6, 2020 01:31
-
-
Save neoyipeng2018/cbe64b40ad683ff50b15fb67fa39fabd to your computer and use it in GitHub Desktop.
FineTuningWith π€ Trainer.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "FineTuningWith π€ Trainer.ipynb", | |
"provenance": [], | |
"collapsed_sections": [ | |
"3slpjqSp8zsZ", | |
"4l-hhP-GaX_j" | |
], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"c6a73993a49c432ba584ec488247c016": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_2a083ff521a749e2b684c6bd3005bc13", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_bbcace5b2216435babe5d2e758894022", | |
"IPY_MODEL_637a2c14acab4d84ad9fd9fcd2a110f8" | |
] | |
} | |
}, | |
"2a083ff521a749e2b684c6bd3005bc13": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"bbcace5b2216435babe5d2e758894022": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_1cd5520bc2164cb5aa4e9722ac664438", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 411, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 411, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_224b1f677b2c496bbf3037952287207d" | |
} | |
}, | |
"637a2c14acab4d84ad9fd9fcd2a110f8": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_da0e168573b74ae79336994546cee36d", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "β", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 411/411 [00:00<00:00, 615B/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_9312c749c6cd419f90f15fd8fab2bdb2" | |
} | |
}, | |
"1cd5520bc2164cb5aa4e9722ac664438": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"224b1f677b2c496bbf3037952287207d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"da0e168573b74ae79336994546cee36d": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"9312c749c6cd419f90f15fd8fab2bdb2": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"f7721aa245fd40fcbaecf1fbc6855a39": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_4754b43cca614df7af16afb508c99b8a", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_3d3e2d7609454dae98ec0a85f4e219ea", | |
"IPY_MODEL_b0e7c0e8454e48698b9aff2271a59683" | |
] | |
} | |
}, | |
"4754b43cca614df7af16afb508c99b8a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"3d3e2d7609454dae98ec0a85f4e219ea": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_ab1af4531e154c5ab80e940e5c8e3746", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 213450, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 213450, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_70b83f9da5744ed0a91d5c9967f6cb7b" | |
} | |
}, | |
"b0e7c0e8454e48698b9aff2271a59683": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_5960e702f1724327942731b84f3ced8f", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "β", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 213k/213k [00:00<00:00, 853kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_2cc334cd9fc04419bcaeaec4f5a0f328" | |
} | |
}, | |
"ab1af4531e154c5ab80e940e5c8e3746": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"70b83f9da5744ed0a91d5c9967f6cb7b": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5960e702f1724327942731b84f3ced8f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"2cc334cd9fc04419bcaeaec4f5a0f328": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"f3ab3e82cd7b4a85b530576a7e26cde2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_a060bdc45a754f6aae21badd838475a5", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_532d0411a8a14b27b426663a4e875bf5", | |
"IPY_MODEL_4aa71e4105804cc889cd99a88a478b3b" | |
] | |
} | |
}, | |
"a060bdc45a754f6aae21badd838475a5": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"532d0411a8a14b27b426663a4e875bf5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_3c212742117f4a27aaca29fcde46c709", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 263273408, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 263273408, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_8b3db1af7ec844079405c2a80c88f813" | |
} | |
}, | |
"4aa71e4105804cc889cd99a88a478b3b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_45b546123fec4d64b6e3c55991f68a7f", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "β", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 263M/263M [00:04<00:00, 56.0MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_793c1c8f87ab4138a58d194172632fca" | |
} | |
}, | |
"3c212742117f4a27aaca29fcde46c709": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"8b3db1af7ec844079405c2a80c88f813": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"45b546123fec4d64b6e3c55991f68a7f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"793c1c8f87ab4138a58d194172632fca": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/neoyipeng2018/cbe64b40ad683ff50b15fb67fa39fabd/finetuningwithtrainer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BXmJ_IPxb8cn", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"#Fine Tuning a Language Model using π€'s Trainer\n", | |
"In some recent NLP competitions I entered, I noticed the huge benefits of fine tuning a language model before starting to further fine tune for downstream tasks.\n", | |
"\n", | |
"Transformers library has a `Trainer` module which has an end to end train/evaluation loop to fine-tune a transformer model. There are some notebooks/guides but I was looking for a simple example that contains all the basic needs like setting up the dataset, evaluation metrics, tensorboards etc, but couldn't really find one, so I decided to create a one that contains everything I needed to start.\n", | |
"\n", | |
"**References**:\n", | |
"1. https://zablo.net/blog/post/training-roberta-from-scratch-the-missing-guide-polish-language-model/\n", | |
"1. https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb#scrollTo=GlvP_A-THEEl\n", | |
"1. https://skimai.com/roberta-language-model-for-spanish/" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "-skY1JbAbEQ6", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Installing stuffs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3pPYShGjTAxW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!pip install transformers\n", | |
"!pip install tokenizers\n", | |
"!pip install tensorboard" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "bZO302pF7mig", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Ingredients for Langugage Modelling\n", | |
"1. Model\n", | |
"1. Dataset\n", | |
"1. Trainer" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "3slpjqSp8zsZ", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"### 1. Initialise Model and Tokenizer\n", | |
"This is pretty straightforward thanks to π€. You essentially just need to choose a model name from https://huggingface.co/models" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OD71y3mYZ-tP", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 182, | |
"referenced_widgets": [ | |
"c6a73993a49c432ba584ec488247c016", | |
"2a083ff521a749e2b684c6bd3005bc13", | |
"bbcace5b2216435babe5d2e758894022", | |
"637a2c14acab4d84ad9fd9fcd2a110f8", | |
"1cd5520bc2164cb5aa4e9722ac664438", | |
"224b1f677b2c496bbf3037952287207d", | |
"da0e168573b74ae79336994546cee36d", | |
"9312c749c6cd419f90f15fd8fab2bdb2", | |
"f7721aa245fd40fcbaecf1fbc6855a39", | |
"4754b43cca614df7af16afb508c99b8a", | |
"3d3e2d7609454dae98ec0a85f4e219ea", | |
"b0e7c0e8454e48698b9aff2271a59683", | |
"ab1af4531e154c5ab80e940e5c8e3746", | |
"70b83f9da5744ed0a91d5c9967f6cb7b", | |
"5960e702f1724327942731b84f3ced8f", | |
"2cc334cd9fc04419bcaeaec4f5a0f328", | |
"f3ab3e82cd7b4a85b530576a7e26cde2", | |
"a060bdc45a754f6aae21badd838475a5", | |
"532d0411a8a14b27b426663a4e875bf5", | |
"4aa71e4105804cc889cd99a88a478b3b", | |
"3c212742117f4a27aaca29fcde46c709", | |
"8b3db1af7ec844079405c2a80c88f813", | |
"45b546123fec4d64b6e3c55991f68a7f", | |
"793c1c8f87ab4138a58d194172632fca" | |
] | |
}, | |
"outputId": "b04d0c84-7a05-4ad5-ddc3-df92efe17a00" | |
}, | |
"source": [ | |
"from transformers import AutoConfig,AutoTokenizer,AutoModelForPreTraining\n", | |
"\n", | |
"modelnm='distilbert-base-cased'\n", | |
"tokenizer=AutoTokenizer.from_pretrained(modelnm)\n", | |
"model=AutoModelForPreTraining.from_pretrained(modelnm)\n", | |
"\n", | |
"#test tokenizer\n", | |
"tokenizer.tokenize('extravagant')" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "c6a73993a49c432ba584ec488247c016", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=411.0, style=ProgressStyle(description_β¦" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "f7721aa245fd40fcbaecf1fbc6855a39", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descriptiβ¦" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "f3ab3e82cd7b4a85b530576a7e26cde2", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=263273408.0, style=ProgressStyle(descriβ¦" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['extra', '##va', '##gant']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "VOBqfWOn1zBQ", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"### 2. Creating train and evaluation dataset\n", | |
"Quite often, my datasets are in a .txt or .csv. There are 2 ways to create a dataset in huggingface from text files:\n", | |
"\n", | |
"1. **LineByLineTextDataset**: Assumes each line is a document, and tokenizer will only run once on each line, hence documents that are longer than the block size will be truncated.\n", | |
"2. **TextDataset**: Assumes the documents are one big corpus, and splits using block size. There won't be any padding though.\n", | |
"\n", | |
"For this use case, I'll just grab a Amazon Review from https://nijianmo.github.io/amazon/index.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-CfqLPJVYEKS", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 208 | |
}, | |
"outputId": "5e2acf1e-2118-415e-9f10-dca1f04d5601" | |
}, | |
"source": [ | |
"!wget http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"--2020-09-03 14:41:51-- http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz\n", | |
"Resolving deepyeti.ucsd.edu (deepyeti.ucsd.edu)... 169.228.63.50\n", | |
"Connecting to deepyeti.ucsd.edu (deepyeti.ucsd.edu)|169.228.63.50|:80... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 287013 (280K) [application/octet-stream]\n", | |
"Saving to: βAMAZON_FASHION_5.json.gzβ\n", | |
"\n", | |
"AMAZON_FASHION_5.js 100%[===================>] 280.29K --.-KB/s in 0.1s \n", | |
"\n", | |
"2020-09-03 14:41:51 (2.22 MB/s) - βAMAZON_FASHION_5.json.gzβ saved [287013/287013]\n", | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "kyFpWUyvXMwq", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "cca1794f-bbe0-4b13-f7eb-18a9a2e7bdf7" | |
}, | |
"source": [ | |
"import gzip, json, pandas as pd\n", | |
"\n", | |
"data = []\n", | |
"with gzip.open('AMAZON_FASHION_5.json.gz') as f:\n", | |
" for l in f:\n", | |
" data.append(json.loads(l.strip()))\n", | |
"df = pd.DataFrame.from_dict(data)\n", | |
"\n", | |
"print(len(df))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"3176\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "F_ZejltASXU-", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"df[['reviewText']][:3000].to_csv('train.csv')\n", | |
"df[['reviewText']][3000:].to_csv('val.csv')" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "C3QVwSGWUc4z", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from transformers import LineByLineTextDataset,DataCollatorForLanguageModeling,TextDataset" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "087O5Kb6M7Bj", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_dataset = TextDataset(\n", | |
" tokenizer=tokenizer,\n", | |
" file_path='train.csv',\n", | |
" block_size=128\n", | |
")\n", | |
"\n", | |
"val_dataset = TextDataset(\n", | |
" tokenizer=tokenizer,\n", | |
" file_path='val.csv',\n", | |
" block_size=128\n", | |
")\n", | |
"\n", | |
"# LineBYLine\n", | |
"# train_dataset = LineByLineTextDataset(\n", | |
"# tokenizer=tokenizer,\n", | |
"# file_path=\"train.csv\",\n", | |
"# block_size=512\n", | |
"# )\n", | |
"\n", | |
"# val_dataset = LineByLineTextDataset(\n", | |
"# tokenizer=tokenizer,\n", | |
"# file_path=\"val.csv\",\n", | |
"# block_size=512\n", | |
"# )\n", | |
"# data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,mlm=True,mlm_probability=0.15)\n", | |
"\n", | |
"data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,mlm=True,mlm_probability=0.15)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Z7U1QxGZ77Zk", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"### 3. Trainer Arguments and Trainer\n", | |
"We initialise the training arguments and also Trainer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_A8OQiZ1UBrG", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 364 | |
}, | |
"outputId": "d583e58a-6bd5-4307-a4bf-ef20f01f15c4" | |
}, | |
"source": [ | |
"# Check that we have a GPU\n", | |
"!nvidia-smi" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Thu Sep 3 14:43:04 2020 \n", | |
"+-----------------------------------------------------------------------------+\n", | |
"| NVIDIA-SMI 450.66 Driver Version: 418.67 CUDA Version: 10.1 |\n", | |
"|-------------------------------+----------------------+----------------------+\n", | |
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", | |
"| | | MIG M. |\n", | |
"|===============================+======================+======================|\n", | |
"| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", | |
"| N/A 42C P0 25W / 250W | 0MiB / 16280MiB | 0% Default |\n", | |
"| | | ERR! |\n", | |
"+-------------------------------+----------------------+----------------------+\n", | |
" \n", | |
"+-----------------------------------------------------------------------------+\n", | |
"| Processes: |\n", | |
"| GPU GI CI PID Type Process name GPU Memory |\n", | |
"| ID ID Usage |\n", | |
"|=============================================================================|\n", | |
"| No running processes found |\n", | |
"+-----------------------------------------------------------------------------+\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "z424e05WUFrz", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "d913f282-b185-4f87-eeb8-29534e537988" | |
}, | |
"source": [ | |
"import torch\n", | |
"torch.cuda.is_available()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Gm-zq6m5T8zC", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"### Using Trainer\n", | |
"- For details on training arguments: https://huggingface.co/transformers/main_classes/trainer.html\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "SvTNFlOVi5xA", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"To get the Trainer to compute accuracy for our Masked Language Modelling, we create these 2 functions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "e57i8G6AEXvb", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def accuracy(input, targs):\n", | |
" \"Computes accuracy with `targs` when `input` is bs * n_classes.\"\n", | |
" n = targs.shape[0]\n", | |
" #input = input.argmax(dim=-1).view(n,-1)\n", | |
" input = input.reshape(n,-1)\n", | |
" targs = targs.reshape(n,-1)\n", | |
" # return (input==targs).float().mean()\n", | |
" return (input==targs).mean()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "GexxpXt3EX7e", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def compute_metrics(pred):\n", | |
" labels = pred.label_ids\n", | |
" preds = pred.predictions.argmax(-1)\n", | |
" acc = accuracy(labels, preds)\n", | |
" return {\n", | |
" 'accuracy': acc\n", | |
" }" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "F33spPoZjCfn", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Start tensorboard writer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HE4qoiWpOxMK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from torch.utils.tensorboard import SummaryWriter\n", | |
"tb = SummaryWriter()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "wjeKhkrGjJhm", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Define training arguments and trainer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BPgFNONpEYAW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from transformers import Trainer, TrainingArguments\n", | |
"\n", | |
"training_args = TrainingArguments(\n", | |
" output_dir=\"./model\",\n", | |
" overwrite_output_dir=True,\n", | |
" num_train_epochs=3,\n", | |
" per_device_train_batch_size=8,\n", | |
" per_device_eval_batch_size=8,\n", | |
" save_steps=1000,\n", | |
" save_total_limit=1,\n", | |
" learning_rate=5e-5, #In BERT, fine tuning lrs were in the range of (2e-5 to 5e-5)\n", | |
" do_train=True,\n", | |
" evaluate_during_training=True,\n", | |
" # warmup_steps=1000 #In BERT, pre-training phase had 10k warmup\n", | |
" logging_steps=1,\n", | |
" eval_steps=1,\n", | |
" gradient_accumulation_steps=8, #reduce memory usage while allowing bigger overall batch size. Roberta used this technique to get a 8k batch size.s\n", | |
")\n", | |
"\n", | |
"trainer = Trainer(\n", | |
" model=model,\n", | |
" args=training_args,\n", | |
" data_collator=data_collator,\n", | |
" train_dataset=train_dataset,\n", | |
" eval_dataset=val_dataset,\n", | |
" compute_metrics=compute_metrics,\n", | |
" tb_writer=tb\n", | |
")" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "2R5zX616jN2r", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "JgBw1Ap8EYFF", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"%%time\n", | |
"trainer.train()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SFy2fG0cJ51H", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 766 | |
}, | |
"outputId": "dc59a426-2c89-42c5-aa87-e879f34fa541" | |
}, | |
"source": [ | |
"!tensorboard dev upload --logdir runs" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"2020-09-03 14:45:50.966465: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n", | |
"\n", | |
"***** TensorBoard Uploader *****\n", | |
"\n", | |
"This will upload your TensorBoard logs to https://tensorboard.dev/ from\n", | |
"the following directory:\n", | |
"\n", | |
"runs\n", | |
"\n", | |
"This TensorBoard will be visible to everyone. Do not upload sensitive\n", | |
"data.\n", | |
"\n", | |
"Your use of this service is subject to Google's Terms of Service\n", | |
"<https://policies.google.com/terms> and Privacy Policy\n", | |
"<https://policies.google.com/privacy>, and TensorBoard.dev's Terms of Service\n", | |
"<https://tensorboard.dev/policy/terms/>.\n", | |
"\n", | |
"This notice will not be shown again while you are logged into the uploader.\n", | |
"To log out, run `tensorboard dev auth revoke`.\n", | |
"\n", | |
"Continue? (yes/NO) yes\n", | |
"\n", | |
"Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=373649185512-8v619h5kft38l4456nm2dj4ubeqsrvh6.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email&state=DySudJM7iN3tEjewfPCdKAL8frAksT&prompt=consent&access_type=offline\n", | |
"Enter the authorization code: 4/3gE8vDdu5I96mSAvBzuMa6vE6QTItHBxh_2SIxwJISxTuadWuk4O3MU\n", | |
"\n", | |
"Data for the \"graphs\" plugin is now uploaded to TensorBoard.dev! Note that uploaded data is public. If you do not want to upload data for this plugin, use the \"--plugins\" command line argument.\n", | |
"Data for the \"histograms\" plugin is now uploaded to TensorBoard.dev! Note that uploaded data is public. If you do not want to upload data for this plugin, use the \"--plugins\" command line argument.\n", | |
"Data for the \"hparams\" plugin is now uploaded to TensorBoard.dev! Note that uploaded data is public. If you do not want to upload data for this plugin, use the \"--plugins\" command line argument.\n", | |
"Upload started and will continue reading any new data as it's added\n", | |
"to the logdir. To stop uploading, press Ctrl-C.\n", | |
"\n", | |
"View your TensorBoard live at: https://tensorboard.dev/experiment/DIl7t4EkQym5kGXEXO4qTw/\n", | |
"\n", | |
"\u001b[1m[2020-09-03T14:46:10]\u001b[0m Uploader started.\n", | |
"\u001b[1m[2020-09-03T14:46:10]\u001b[0m Total uploaded: 216 scalars, 3 tensors (18 B), 0 binary objects\n", | |
"\n", | |
"Interrupted. View your TensorBoard at https://tensorboard.dev/experiment/DIl7t4EkQym5kGXEXO4qTw/\n", | |
"Exception ignored in: <bound method Channel.__del__ of <grpc._channel.Channel object at 0x7fc9ed7cd3c8>>\n", | |
"Traceback (most recent call last):\n", | |
" File \"/usr/local/lib/python3.6/dist-packages/grpc/_channel.py\", line 1446, in __del__\n", | |
" def __del__(self):\n", | |
"KeyboardInterrupt\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "_8EMioIuiECl", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Save" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "2y4E9Y36jQyT", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"trainer.save_model(\"./model\")" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "4l-hhP-GaX_j", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# Conclusion\n", | |
"We've initialised a transformer model, loaded a dataset and fine-tuned a pre-trained language model for a few epochs and visualised the valiation loss and accuracy in tensorboard." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CpIunQmZYcg_", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Next: Explore hyperparameter search\n", | |
"1. https://huggingface.co/transformers/master/main_classes/trainer.html#transformers.Trainer.hyperparameter_search\n", | |
"1. https://discuss.huggingface.co/t/using-hyperparameter-search-in-trainer/785/10" | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment