Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save JonathanLoscalzo/699c27c60754d62bcb86b9f38d84ad32 to your computer and use it in GitHub Desktop.
Save JonathanLoscalzo/699c27c60754d62bcb86b9f38d84ad32 to your computer and use it in GitHub Desktop.
LLM Zoomcamp - Homework 4 - Monitoring (2024).ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyNziNCMKJPodalX7fnBV4WI",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"c0f3a801b82b4738a35e7910ef78b225": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_b04206ccf2ea49c1868df1d6f98dbbda",
"IPY_MODEL_d89fbf7df5f04aab9b86ddc792f441fa",
"IPY_MODEL_28edf06b44c94ffe80e52b9718e6c2a4"
],
"layout": "IPY_MODEL_94db3abe6b7e4aed873516725dfb8b80"
}
},
"b04206ccf2ea49c1868df1d6f98dbbda": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cb026ab34b474ed8adc8f86e2bcf0ebb",
"placeholder": "​",
"style": "IPY_MODEL_e6e6eb3405564874a7b8585731983a37",
"value": "modules.json: 100%"
}
},
"d89fbf7df5f04aab9b86ddc792f441fa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5bf17ad718c149f181eadb590b941b1a",
"max": 229,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d7f8223e6b284e628e46761bed848876",
"value": 229
}
},
"28edf06b44c94ffe80e52b9718e6c2a4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_42e80791d6164c1e94269f29778e270f",
"placeholder": "​",
"style": "IPY_MODEL_8a66d5c91a8a46368f22d82ce40ada32",
"value": " 229/229 [00:00<00:00, 10.2kB/s]"
}
},
"94db3abe6b7e4aed873516725dfb8b80": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cb026ab34b474ed8adc8f86e2bcf0ebb": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e6e6eb3405564874a7b8585731983a37": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"5bf17ad718c149f181eadb590b941b1a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d7f8223e6b284e628e46761bed848876": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"42e80791d6164c1e94269f29778e270f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8a66d5c91a8a46368f22d82ce40ada32": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"fa41e2c1da61494c951c5199213559ec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_dd3bf8e6af354a7195c41142981e8f72",
"IPY_MODEL_107f6ff80ee8476ab521c9525c7d0e0d",
"IPY_MODEL_4342b253205046659e5b313f6eabee4c"
],
"layout": "IPY_MODEL_c006ae2f560b4d519753de98eae7146c"
}
},
"dd3bf8e6af354a7195c41142981e8f72": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c77a590fbcb14302bf9be40cf1717243",
"placeholder": "​",
"style": "IPY_MODEL_1f637285f07d4a9c912d0b00814ba225",
"value": "config_sentence_transformers.json: 100%"
}
},
"107f6ff80ee8476ab521c9525c7d0e0d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d7da38968620420491f03c54f8c8cf8c",
"max": 212,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_8927508a01c34ed3ac3c4495c2019bae",
"value": 212
}
},
"4342b253205046659e5b313f6eabee4c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_aa162e400dce4b77887c7748c4c3a87d",
"placeholder": "​",
"style": "IPY_MODEL_72807713587047f983e5960d168d7d7e",
"value": " 212/212 [00:00<00:00, 8.74kB/s]"
}
},
"c006ae2f560b4d519753de98eae7146c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c77a590fbcb14302bf9be40cf1717243": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1f637285f07d4a9c912d0b00814ba225": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d7da38968620420491f03c54f8c8cf8c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8927508a01c34ed3ac3c4495c2019bae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"aa162e400dce4b77887c7748c4c3a87d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"72807713587047f983e5960d168d7d7e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"35ecdce9c569492e9c934a48d0a0caf5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_7ec6b59044b74aeaa8ff501c3a96fdc6",
"IPY_MODEL_202f6e8e1480402fafc4c08cc94d8934",
"IPY_MODEL_ba4d3ebc614f4773be2e8c4c95240425"
],
"layout": "IPY_MODEL_913332a3671044f29c46406c209aae39"
}
},
"7ec6b59044b74aeaa8ff501c3a96fdc6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_70b1e749c8ed4c2f8e9c106b09bc82bd",
"placeholder": "​",
"style": "IPY_MODEL_d33d476345374d938c20c75b587bbe3b",
"value": "README.md: 100%"
}
},
"202f6e8e1480402fafc4c08cc94d8934": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_aa2d8b958c754ba5b12b86c2cf301d97",
"max": 8714,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_86416940d35a4438b9d5193395a7864b",
"value": 8714
}
},
"ba4d3ebc614f4773be2e8c4c95240425": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3da22f9b99924381821d01ce4cf4c691",
"placeholder": "​",
"style": "IPY_MODEL_25a9c0c492f4455f8607104539454cc0",
"value": " 8.71k/8.71k [00:00<00:00, 297kB/s]"
}
},
"913332a3671044f29c46406c209aae39": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"70b1e749c8ed4c2f8e9c106b09bc82bd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d33d476345374d938c20c75b587bbe3b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"aa2d8b958c754ba5b12b86c2cf301d97": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"86416940d35a4438b9d5193395a7864b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"3da22f9b99924381821d01ce4cf4c691": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"25a9c0c492f4455f8607104539454cc0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9af2ca42d80e41c7b69dc379f3ccb849": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_46d3678e5b5943779b7c860e20f6caba",
"IPY_MODEL_d16b5544b40a4f298c46f8a6480e4189",
"IPY_MODEL_0cdfdc46a878466cb5623053633675f7"
],
"layout": "IPY_MODEL_435f4205137e49598efae9f7ca4afce0"
}
},
"46d3678e5b5943779b7c860e20f6caba": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4bc4f508ebe441e7ae6cd2fcc585f6a1",
"placeholder": "​",
"style": "IPY_MODEL_62afcfd4468e4df3a636a07ca405e5d4",
"value": "sentence_bert_config.json: 100%"
}
},
"d16b5544b40a4f298c46f8a6480e4189": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bee36bc350a34fc3b9ed2d6b66aa0af2",
"max": 53,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_f3cd69e1733740b0a8448c9704c0d68b",
"value": 53
}
},
"0cdfdc46a878466cb5623053633675f7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_05081bdbde684ecdadb763665ac213e8",
"placeholder": "​",
"style": "IPY_MODEL_3b14b3cde0ce40b4850aa76c17f77e47",
"value": " 53.0/53.0 [00:00<00:00, 2.85kB/s]"
}
},
"435f4205137e49598efae9f7ca4afce0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4bc4f508ebe441e7ae6cd2fcc585f6a1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"62afcfd4468e4df3a636a07ca405e5d4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"bee36bc350a34fc3b9ed2d6b66aa0af2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f3cd69e1733740b0a8448c9704c0d68b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"05081bdbde684ecdadb763665ac213e8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3b14b3cde0ce40b4850aa76c17f77e47": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"939c1f05bce54aa7bd313ac49e317e3f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_7a8063cb38ac4df9b5fcfc73c2e94134",
"IPY_MODEL_cec88efd43594c03b805feaa860d3d3a",
"IPY_MODEL_8163a0aaafb143b6a76e1e0af7a7465e"
],
"layout": "IPY_MODEL_adea853ed5544b93870b151d3191174f"
}
},
"7a8063cb38ac4df9b5fcfc73c2e94134": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9d1bb40b71b64c6781b277f559461c39",
"placeholder": "​",
"style": "IPY_MODEL_910f039aeea44a1e9874a445a029ef91",
"value": "config.json: 100%"
}
},
"cec88efd43594c03b805feaa860d3d3a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_fce7bbd09a414475b49f8888d2fdf824",
"max": 571,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_89251be090cd4e1ea96afaa5a3e52acc",
"value": 571
}
},
"8163a0aaafb143b6a76e1e0af7a7465e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c84ecaec216f4484807823b57a528029",
"placeholder": "​",
"style": "IPY_MODEL_45d026fcb41c48bcbd1ed3e1a608b9ca",
"value": " 571/571 [00:00<00:00, 47.1kB/s]"
}
},
"adea853ed5544b93870b151d3191174f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9d1bb40b71b64c6781b277f559461c39": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"910f039aeea44a1e9874a445a029ef91": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"fce7bbd09a414475b49f8888d2fdf824": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"89251be090cd4e1ea96afaa5a3e52acc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"c84ecaec216f4484807823b57a528029": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"45d026fcb41c48bcbd1ed3e1a608b9ca": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"871e1bf7b242477387b92f9acdc4aa27": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_8323bc252b844c6fa6e42d9b5014f1b2",
"IPY_MODEL_bf741bb2b41e4af0b63f9b424a1c12ce",
"IPY_MODEL_29df7252a56a4bea948cbb1afe7f6120"
],
"layout": "IPY_MODEL_9ec1b4d087c74fb0b44c0cd4c8ce70ae"
}
},
"8323bc252b844c6fa6e42d9b5014f1b2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_008aca64d53649e9a5e6824a4a24749d",
"placeholder": "​",
"style": "IPY_MODEL_3853431c60564648b4873a73b1e43e95",
"value": "model.safetensors: 100%"
}
},
"bf741bb2b41e4af0b63f9b424a1c12ce": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_06303f6bae914d4db2995956cd845eae",
"max": 437971872,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d17e44c5f4eb48c195de47daa217cc73",
"value": 437971872
}
},
"29df7252a56a4bea948cbb1afe7f6120": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3a0a7cfd1f914d7faaf77ae1b22f0e09",
"placeholder": "​",
"style": "IPY_MODEL_caf57527271a41a18a8282c41a07d6db",
"value": " 438M/438M [00:02<00:00, 209MB/s]"
}
},
"9ec1b4d087c74fb0b44c0cd4c8ce70ae": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"008aca64d53649e9a5e6824a4a24749d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3853431c60564648b4873a73b1e43e95": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"06303f6bae914d4db2995956cd845eae": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d17e44c5f4eb48c195de47daa217cc73": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"3a0a7cfd1f914d7faaf77ae1b22f0e09": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"caf57527271a41a18a8282c41a07d6db": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9f7f0368d43c49bb8912a25e796f895c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_75e30dded9b1497eac623edac507bc58",
"IPY_MODEL_1870fb6d6c284ab9af885fe7eb168303",
"IPY_MODEL_8a960e3bc12047c594d04e2d6c84b9c5"
],
"layout": "IPY_MODEL_76867fb7a2b54dfeaca7dfb3fca19e6b"
}
},
"75e30dded9b1497eac623edac507bc58": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_90443620f2864db9a793120b529c3e79",
"placeholder": "​",
"style": "IPY_MODEL_6dc0e36f3ae147beaeae7fd2e07c53b1",
"value": "tokenizer_config.json: 100%"
}
},
"1870fb6d6c284ab9af885fe7eb168303": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ca578e600b6a4a78bb220a4be05c59a5",
"max": 363,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_527e0625c9b449d4b4e1872252bd82eb",
"value": 363
}
},
"8a960e3bc12047c594d04e2d6c84b9c5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c2f10f9673d04abc955fd5a9b59dae41",
"placeholder": "​",
"style": "IPY_MODEL_34d0c8aa6311451da0df212302b33e7a",
"value": " 363/363 [00:00<00:00, 27.1kB/s]"
}
},
"76867fb7a2b54dfeaca7dfb3fca19e6b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"90443620f2864db9a793120b529c3e79": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6dc0e36f3ae147beaeae7fd2e07c53b1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ca578e600b6a4a78bb220a4be05c59a5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"527e0625c9b449d4b4e1872252bd82eb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"c2f10f9673d04abc955fd5a9b59dae41": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"34d0c8aa6311451da0df212302b33e7a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9400d3071cc2419fbcda51b47e03e7d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_0e7b573f84d24d70a44e4a53b8bebdc6",
"IPY_MODEL_f969f571bd1e4805a0703e299c5b03c7",
"IPY_MODEL_2133c7b5225e43f68dddcc85a5a294f0"
],
"layout": "IPY_MODEL_f556fa51ed754554850a2c2fba34f5cf"
}
},
"0e7b573f84d24d70a44e4a53b8bebdc6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6bc8b543e46b4471b8ddbc784721620e",
"placeholder": "​",
"style": "IPY_MODEL_a8179b2ef79945dfba99a92f4cb9426c",
"value": "vocab.txt: 100%"
}
},
"f969f571bd1e4805a0703e299c5b03c7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3e59e1bf45604333b39c1fee42dd3a18",
"max": 231536,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_23dafb030fb34872bac6dc803218b1b5",
"value": 231536
}
},
"2133c7b5225e43f68dddcc85a5a294f0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2e8719a464244112bac95db4780ac202",
"placeholder": "​",
"style": "IPY_MODEL_5296d0e2e4e349b4bf372d0e1ecc2a87",
"value": " 232k/232k [00:00<00:00, 3.51MB/s]"
}
},
"f556fa51ed754554850a2c2fba34f5cf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6bc8b543e46b4471b8ddbc784721620e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a8179b2ef79945dfba99a92f4cb9426c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3e59e1bf45604333b39c1fee42dd3a18": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"23dafb030fb34872bac6dc803218b1b5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"2e8719a464244112bac95db4780ac202": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5296d0e2e4e349b4bf372d0e1ecc2a87": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"4e9833fd35c546d8aea08153afe6af42": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_5d9daf3b33e948b38202a5fd059a0202",
"IPY_MODEL_0b8627d986294d2aa88d6880ad03b9df",
"IPY_MODEL_935570cf1d584d988e3895a6927f477f"
],
"layout": "IPY_MODEL_f99217d08d2e4639ad0c3bab7cd55ba4"
}
},
"5d9daf3b33e948b38202a5fd059a0202": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cc2b16efdbba489e95fb9039f64ca839",
"placeholder": "​",
"style": "IPY_MODEL_a87367fcae634378b17b98dcd101cd22",
"value": "tokenizer.json: 100%"
}
},
"0b8627d986294d2aa88d6880ad03b9df": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9ff044b7d1a2464f8c16aede36483ef8",
"max": 466021,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_0c14b4c3da6a4f2d9ab5eb56c169d0ee",
"value": 466021
}
},
"935570cf1d584d988e3895a6927f477f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_79fdb466c0324812b598b2023aed967b",
"placeholder": "​",
"style": "IPY_MODEL_885653b0bcfa47b0bc4d8b2a2bab7a38",
"value": " 466k/466k [00:00<00:00, 6.32MB/s]"
}
},
"f99217d08d2e4639ad0c3bab7cd55ba4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cc2b16efdbba489e95fb9039f64ca839": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a87367fcae634378b17b98dcd101cd22": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9ff044b7d1a2464f8c16aede36483ef8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0c14b4c3da6a4f2d9ab5eb56c169d0ee": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"79fdb466c0324812b598b2023aed967b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"885653b0bcfa47b0bc4d8b2a2bab7a38": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3698d1415da344459a343235b69e940a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_42db2b0c0c0545c7bc1acbe1cef7958d",
"IPY_MODEL_f02b4e515bd64113a8fbe72200965b87",
"IPY_MODEL_25b96ea9bf75445da650b9a2390f05b4"
],
"layout": "IPY_MODEL_5d8e5a5c176c406c8d18c2c9703c9d1d"
}
},
"42db2b0c0c0545c7bc1acbe1cef7958d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_701de0e681de4df185dfd074ada39929",
"placeholder": "​",
"style": "IPY_MODEL_8bc3fd6e32f44a078e24d45c7cb17dc7",
"value": "special_tokens_map.json: 100%"
}
},
"f02b4e515bd64113a8fbe72200965b87": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f7c3ba1086dd4627972302ba8faac68d",
"max": 239,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_9cab2808b38b4a599c09cc903e90af6a",
"value": 239
}
},
"25b96ea9bf75445da650b9a2390f05b4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_28d98921714244e8905cd66dfe65a882",
"placeholder": "​",
"style": "IPY_MODEL_2c2aac90798547e0a2c82ed318fdba32",
"value": " 239/239 [00:00<00:00, 16.4kB/s]"
}
},
"5d8e5a5c176c406c8d18c2c9703c9d1d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"701de0e681de4df185dfd074ada39929": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8bc3fd6e32f44a078e24d45c7cb17dc7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f7c3ba1086dd4627972302ba8faac68d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9cab2808b38b4a599c09cc903e90af6a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"28d98921714244e8905cd66dfe65a882": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2c2aac90798547e0a2c82ed318fdba32": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"895765feec51411a9013846a50d598cc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_76466a1dcb39481d8b3291293e0515c0",
"IPY_MODEL_93da8d8c37824f168e24db215e7db002",
"IPY_MODEL_dbb9170bc9f344b6a5dafd5876075231"
],
"layout": "IPY_MODEL_0b2b93d3dc714fbf8d4f5b25e8eeb475"
}
},
"76466a1dcb39481d8b3291293e0515c0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4eddb51892de4baea2cc1c4863343d42",
"placeholder": "​",
"style": "IPY_MODEL_9373552abfd141c6b1634ac35564a25e",
"value": "1_Pooling/config.json: 100%"
}
},
"93da8d8c37824f168e24db215e7db002": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_0b1adc8d2e6d4334bb67d1ae7f9c34bb",
"max": 190,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_f4fd37bc6ef0437d92bf341122e013d2",
"value": 190
}
},
"dbb9170bc9f344b6a5dafd5876075231": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_20b14da1bbd84406abbb03e302ec8253",
"placeholder": "​",
"style": "IPY_MODEL_db9051b3e1bb44bda4b06c22222ce075",
"value": " 190/190 [00:00<00:00, 6.97kB/s]"
}
},
"0b2b93d3dc714fbf8d4f5b25e8eeb475": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4eddb51892de4baea2cc1c4863343d42": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9373552abfd141c6b1634ac35564a25e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"0b1adc8d2e6d4334bb67d1ae7f9c34bb": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f4fd37bc6ef0437d92bf341122e013d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"20b14da1bbd84406abbb03e302ec8253": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"db9051b3e1bb44bda4b06c22222ce075": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/JonathanLoscalzo/699c27c60754d62bcb86b9f38d84ad32/llm-zoomcamp-homework-4-monitoring-2024.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xcYas3G5_NFk",
"outputId": "83f0ad00-9de4-4bc3-80de-c76c9df0caa8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2024-08-19 00:17:20-- https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv?raw=1\n",
"Resolving github.com (github.com)... 140.82.114.3\n",
"Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://github.com/DataTalksClub/llm-zoomcamp/raw/main/04-monitoring/data/results-gpt4o-mini.csv [following]\n",
"--2024-08-19 00:17:21-- https://github.com/DataTalksClub/llm-zoomcamp/raw/main/04-monitoring/data/results-gpt4o-mini.csv\n",
"Reusing existing connection to github.com:443.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv [following]\n",
"--2024-08-19 00:17:21-- https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 1705231 (1.6M) [text/plain]\n",
"Saving to: ‘results-gpt4o-mini.csv’\n",
"\n",
"results-gpt4o-mini. 100%[===================>] 1.63M --.-KB/s in 0.05s \n",
"\n",
"2024-08-19 00:17:22 (32.5 MB/s) - ‘results-gpt4o-mini.csv’ saved [1705231/1705231]\n",
"\n"
]
}
],
"source": [
"!wget https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv?raw=1 -O results-gpt4o-mini.csv"
]
},
{
"cell_type": "code",
"source": [
"github_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'"
],
"metadata": {
"id": "NKs9xdJF_fr7"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"\n",
"# url = f'{github_url}?raw=1'\n",
"df = pd.read_csv('results-gpt4o-mini.csv')"
],
"metadata": {
"id": "kEx7ttwl_Y3I"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 423
},
"id": "7xxWJ0Nd_nCa",
"outputId": "b345590e-c7de-4a3a-aef9-2ecbbe4df4d0"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" answer_llm \\\n",
"0 You can sign up for the course by visiting the... \n",
"1 You can sign up using the link provided in the... \n",
"2 Yes, there is an FAQ for the Machine Learning ... \n",
"3 The context does not provide any specific info... \n",
"4 To structure your questions and answers for th... \n",
"... ... \n",
"1825 Some suggested titles for listing the Machine ... \n",
"1826 It is best advised that you do not list the Ma... \n",
"1827 You can incorporate your Machine Learning Zoom... \n",
"1828 The advice on including a project link in a CV... \n",
"1829 The suggestion to showcase progress through Li... \n",
"\n",
" answer_orig document \\\n",
"0 Machine Learning Zoomcamp FAQ\\nThe purpose of ... 0227b872 \n",
"1 Machine Learning Zoomcamp FAQ\\nThe purpose of ... 0227b872 \n",
"2 Machine Learning Zoomcamp FAQ\\nThe purpose of ... 0227b872 \n",
"3 Machine Learning Zoomcamp FAQ\\nThe purpose of ... 0227b872 \n",
"4 Machine Learning Zoomcamp FAQ\\nThe purpose of ... 0227b872 \n",
"... ... ... \n",
"1825 I’ve seen LinkedIn users list DataTalksClub as... c6a22665 \n",
"1826 I’ve seen LinkedIn users list DataTalksClub as... c6a22665 \n",
"1827 I’ve seen LinkedIn users list DataTalksClub as... c6a22665 \n",
"1828 I’ve seen LinkedIn users list DataTalksClub as... c6a22665 \n",
"1829 I’ve seen LinkedIn users list DataTalksClub as... c6a22665 \n",
"\n",
" question \\\n",
"0 Where can I sign up for the course? \n",
"1 Can you provide a link to sign up? \n",
"2 Is there an FAQ for this Machine Learning course? \n",
"3 Does this course have a GitHub repository for ... \n",
"4 How can I structure my questions and answers f... \n",
"... ... \n",
"1825 What are some suggested titles for listing the... \n",
"1826 Should I list the Machine Learning Zoomcamp ex... \n",
"1827 In which LinkedIn sections can I incorporate m... \n",
"1828 Who gave advice on including a project link in... \n",
"1829 Who suggested showcasing progress through Link... \n",
"\n",
" course \n",
"0 machine-learning-zoomcamp \n",
"1 machine-learning-zoomcamp \n",
"2 machine-learning-zoomcamp \n",
"3 machine-learning-zoomcamp \n",
"4 machine-learning-zoomcamp \n",
"... ... \n",
"1825 machine-learning-zoomcamp \n",
"1826 machine-learning-zoomcamp \n",
"1827 machine-learning-zoomcamp \n",
"1828 machine-learning-zoomcamp \n",
"1829 machine-learning-zoomcamp \n",
"\n",
"[1830 rows x 5 columns]"
],
"text/html": [
"\n",
" <div id=\"df-4b479027-dca8-4b6e-89af-9372481feb7a\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>answer_llm</th>\n",
" <th>answer_orig</th>\n",
" <th>document</th>\n",
" <th>question</th>\n",
" <th>course</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>You can sign up for the course by visiting the...</td>\n",
" <td>Machine Learning Zoomcamp FAQ\\nThe purpose of ...</td>\n",
" <td>0227b872</td>\n",
" <td>Where can I sign up for the course?</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>You can sign up using the link provided in the...</td>\n",
" <td>Machine Learning Zoomcamp FAQ\\nThe purpose of ...</td>\n",
" <td>0227b872</td>\n",
" <td>Can you provide a link to sign up?</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Yes, there is an FAQ for the Machine Learning ...</td>\n",
" <td>Machine Learning Zoomcamp FAQ\\nThe purpose of ...</td>\n",
" <td>0227b872</td>\n",
" <td>Is there an FAQ for this Machine Learning course?</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>The context does not provide any specific info...</td>\n",
" <td>Machine Learning Zoomcamp FAQ\\nThe purpose of ...</td>\n",
" <td>0227b872</td>\n",
" <td>Does this course have a GitHub repository for ...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>To structure your questions and answers for th...</td>\n",
" <td>Machine Learning Zoomcamp FAQ\\nThe purpose of ...</td>\n",
" <td>0227b872</td>\n",
" <td>How can I structure my questions and answers f...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1825</th>\n",
" <td>Some suggested titles for listing the Machine ...</td>\n",
" <td>I’ve seen LinkedIn users list DataTalksClub as...</td>\n",
" <td>c6a22665</td>\n",
" <td>What are some suggested titles for listing the...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826</th>\n",
" <td>It is best advised that you do not list the Ma...</td>\n",
" <td>I’ve seen LinkedIn users list DataTalksClub as...</td>\n",
" <td>c6a22665</td>\n",
" <td>Should I list the Machine Learning Zoomcamp ex...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1827</th>\n",
" <td>You can incorporate your Machine Learning Zoom...</td>\n",
" <td>I’ve seen LinkedIn users list DataTalksClub as...</td>\n",
" <td>c6a22665</td>\n",
" <td>In which LinkedIn sections can I incorporate m...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1828</th>\n",
" <td>The advice on including a project link in a CV...</td>\n",
" <td>I’ve seen LinkedIn users list DataTalksClub as...</td>\n",
" <td>c6a22665</td>\n",
" <td>Who gave advice on including a project link in...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1829</th>\n",
" <td>The suggestion to showcase progress through Li...</td>\n",
" <td>I’ve seen LinkedIn users list DataTalksClub as...</td>\n",
" <td>c6a22665</td>\n",
" <td>Who suggested showcasing progress through Link...</td>\n",
" <td>machine-learning-zoomcamp</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1830 rows × 5 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4b479027-dca8-4b6e-89af-9372481feb7a')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-4b479027-dca8-4b6e-89af-9372481feb7a button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-4b479027-dca8-4b6e-89af-9372481feb7a');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-baedbeef-071f-450c-b79e-61be3cf456ae\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-baedbeef-071f-450c-b79e-61be3cf456ae')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-baedbeef-071f-450c-b79e-61be3cf456ae button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" <div id=\"id_6a3310d8-67d2-4d86-be0b-5aa068817726\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_6a3310d8-67d2-4d86-be0b-5aa068817726 button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('df');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 1830,\n \"fields\": [\n {\n \"column\": \"answer_llm\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1830,\n \"samples\": [\n \"To format a number to 3 decimal places using f-strings, you can use the following syntax:\\n\\n```python\\nprint(f'Average mark for the Homework is {avg:.3f}')\\n```\\n\\nThis will display the value of `avg` rounded to 3 decimal places.\",\n \"To avoid syntax errors instead of using `aws ecr get-login --no-include-email`, you should use the command: \\n\\n```bash\\naws ecr get-login-password\\n```\\n\\nYou can simplify the login process with this command:\\n\\n```bash\\nexport PASSWORD=`aws ecr get-login-password`\\ndocker login -u AWS -p $PASSWORD <ACCOUNT_NUMBER>.dkr.ecr.<REGION>.amazonaws.com/clothing-tflite-images\\n``` \\n\\nMake sure to replace `<ACCOUNT_NUMBER>` and `<REGION>` with your actual values.\",\n \"In the Projects section, a problem title provides the following information: problem description, solution description, and optionally, the name of the person who added the information.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer_orig\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 372,\n \"samples\": [\n \"Yes. You only need to review peers when you submit your project.\\nConfirmed on Slack by Alexey Grigorev (added by Rileen Sinha)\",\n \"In Kaggle, when you are trying to !wget a dataset from github (or any other public repository/location), you get the following error:\\nGetting this error while trying to import data- !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n--2022-09-17 16:55:24-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Temporary failure in name resolution.\\nwget: unable to resolve host address 'raw.githubusercontent.com'\\nSolution:\\nIn your Kaggle notebook settings, turn on the Internet for your session. It's on the settings panel, on the right hand side of the Kaggle screen. You'll be asked to verify your phone number so Kaggle knows you are not a bot.\",\n \"The course is available in the self-paced mode too, so you can go through the materials at any time. But if you want to do it as a cohort with other students, the next iterations will happen in September 2023, September 2024 (and potentially other Septembers as well).\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 374,\n \"samples\": [\n \"1fd83eb9\",\n \"fc2e0a61\",\n \"636f55d5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1801,\n \"samples\": [\n \"Which tool is recommended for managing virtual environments when building Docker images for machine learning models?\",\n \"How can I set up a Conda environment on my computer?\",\n \"What code should I run in Jupyter to see the version of an installed Python package?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"course\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"machine-learning-zoomcamp\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"df = df.iloc[:300]\n"
],
"metadata": {
"id": "LWu9bBhf_vcV"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install sentence_transformers"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CnhaXzqA_639",
"outputId": "e90f0382-9f15-4c3c-ce87-1da45fe7766c"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting sentence_transformers\n",
" Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)\n",
"Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.42.4)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.66.5)\n",
"Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.3.1+cu121)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.26.4)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.3.2)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.13.1)\n",
"Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.23.5)\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (9.4.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (3.15.4)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2024.6.1)\n",
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (24.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (6.0.2)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2.32.3)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (4.12.2)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (1.13.1)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (3.3)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (3.1.4)\n",
"Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
"Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
"Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
"Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
"Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
"Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
"Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
"Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
"Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
"Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n",
"Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\n",
"Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (2.3.1)\n",
"Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence_transformers)\n",
" Using cached nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (2024.5.15)\n",
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.4.4)\n",
"Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.19.1)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.5.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence_transformers) (2.1.5)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (2024.7.4)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence_transformers) (1.3.0)\n",
"Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hUsing cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
"Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
"Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
"Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
"Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
"Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
"Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
"Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
"Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
"Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
"Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
"Using cached nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl (19.7 MB)\n",
"Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, sentence_transformers\n",
"Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.20 nvidia-nvtx-cu12-12.1.105 sentence_transformers-3.0.1\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"model_name = \"multi-qa-mpnet-base-dot-v1\"\n",
"from sentence_transformers import SentenceTransformer\n",
"embedding_model = SentenceTransformer(model_name)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 528,
"referenced_widgets": [
"c0f3a801b82b4738a35e7910ef78b225",
"b04206ccf2ea49c1868df1d6f98dbbda",
"d89fbf7df5f04aab9b86ddc792f441fa",
"28edf06b44c94ffe80e52b9718e6c2a4",
"94db3abe6b7e4aed873516725dfb8b80",
"cb026ab34b474ed8adc8f86e2bcf0ebb",
"e6e6eb3405564874a7b8585731983a37",
"5bf17ad718c149f181eadb590b941b1a",
"d7f8223e6b284e628e46761bed848876",
"42e80791d6164c1e94269f29778e270f",
"8a66d5c91a8a46368f22d82ce40ada32",
"fa41e2c1da61494c951c5199213559ec",
"dd3bf8e6af354a7195c41142981e8f72",
"107f6ff80ee8476ab521c9525c7d0e0d",
"4342b253205046659e5b313f6eabee4c",
"c006ae2f560b4d519753de98eae7146c",
"c77a590fbcb14302bf9be40cf1717243",
"1f637285f07d4a9c912d0b00814ba225",
"d7da38968620420491f03c54f8c8cf8c",
"8927508a01c34ed3ac3c4495c2019bae",
"aa162e400dce4b77887c7748c4c3a87d",
"72807713587047f983e5960d168d7d7e",
"35ecdce9c569492e9c934a48d0a0caf5",
"7ec6b59044b74aeaa8ff501c3a96fdc6",
"202f6e8e1480402fafc4c08cc94d8934",
"ba4d3ebc614f4773be2e8c4c95240425",
"913332a3671044f29c46406c209aae39",
"70b1e749c8ed4c2f8e9c106b09bc82bd",
"d33d476345374d938c20c75b587bbe3b",
"aa2d8b958c754ba5b12b86c2cf301d97",
"86416940d35a4438b9d5193395a7864b",
"3da22f9b99924381821d01ce4cf4c691",
"25a9c0c492f4455f8607104539454cc0",
"9af2ca42d80e41c7b69dc379f3ccb849",
"46d3678e5b5943779b7c860e20f6caba",
"d16b5544b40a4f298c46f8a6480e4189",
"0cdfdc46a878466cb5623053633675f7",
"435f4205137e49598efae9f7ca4afce0",
"4bc4f508ebe441e7ae6cd2fcc585f6a1",
"62afcfd4468e4df3a636a07ca405e5d4",
"bee36bc350a34fc3b9ed2d6b66aa0af2",
"f3cd69e1733740b0a8448c9704c0d68b",
"05081bdbde684ecdadb763665ac213e8",
"3b14b3cde0ce40b4850aa76c17f77e47",
"939c1f05bce54aa7bd313ac49e317e3f",
"7a8063cb38ac4df9b5fcfc73c2e94134",
"cec88efd43594c03b805feaa860d3d3a",
"8163a0aaafb143b6a76e1e0af7a7465e",
"adea853ed5544b93870b151d3191174f",
"9d1bb40b71b64c6781b277f559461c39",
"910f039aeea44a1e9874a445a029ef91",
"fce7bbd09a414475b49f8888d2fdf824",
"89251be090cd4e1ea96afaa5a3e52acc",
"c84ecaec216f4484807823b57a528029",
"45d026fcb41c48bcbd1ed3e1a608b9ca",
"871e1bf7b242477387b92f9acdc4aa27",
"8323bc252b844c6fa6e42d9b5014f1b2",
"bf741bb2b41e4af0b63f9b424a1c12ce",
"29df7252a56a4bea948cbb1afe7f6120",
"9ec1b4d087c74fb0b44c0cd4c8ce70ae",
"008aca64d53649e9a5e6824a4a24749d",
"3853431c60564648b4873a73b1e43e95",
"06303f6bae914d4db2995956cd845eae",
"d17e44c5f4eb48c195de47daa217cc73",
"3a0a7cfd1f914d7faaf77ae1b22f0e09",
"caf57527271a41a18a8282c41a07d6db",
"9f7f0368d43c49bb8912a25e796f895c",
"75e30dded9b1497eac623edac507bc58",
"1870fb6d6c284ab9af885fe7eb168303",
"8a960e3bc12047c594d04e2d6c84b9c5",
"76867fb7a2b54dfeaca7dfb3fca19e6b",
"90443620f2864db9a793120b529c3e79",
"6dc0e36f3ae147beaeae7fd2e07c53b1",
"ca578e600b6a4a78bb220a4be05c59a5",
"527e0625c9b449d4b4e1872252bd82eb",
"c2f10f9673d04abc955fd5a9b59dae41",
"34d0c8aa6311451da0df212302b33e7a",
"9400d3071cc2419fbcda51b47e03e7d2",
"0e7b573f84d24d70a44e4a53b8bebdc6",
"f969f571bd1e4805a0703e299c5b03c7",
"2133c7b5225e43f68dddcc85a5a294f0",
"f556fa51ed754554850a2c2fba34f5cf",
"6bc8b543e46b4471b8ddbc784721620e",
"a8179b2ef79945dfba99a92f4cb9426c",
"3e59e1bf45604333b39c1fee42dd3a18",
"23dafb030fb34872bac6dc803218b1b5",
"2e8719a464244112bac95db4780ac202",
"5296d0e2e4e349b4bf372d0e1ecc2a87",
"4e9833fd35c546d8aea08153afe6af42",
"5d9daf3b33e948b38202a5fd059a0202",
"0b8627d986294d2aa88d6880ad03b9df",
"935570cf1d584d988e3895a6927f477f",
"f99217d08d2e4639ad0c3bab7cd55ba4",
"cc2b16efdbba489e95fb9039f64ca839",
"a87367fcae634378b17b98dcd101cd22",
"9ff044b7d1a2464f8c16aede36483ef8",
"0c14b4c3da6a4f2d9ab5eb56c169d0ee",
"79fdb466c0324812b598b2023aed967b",
"885653b0bcfa47b0bc4d8b2a2bab7a38",
"3698d1415da344459a343235b69e940a",
"42db2b0c0c0545c7bc1acbe1cef7958d",
"f02b4e515bd64113a8fbe72200965b87",
"25b96ea9bf75445da650b9a2390f05b4",
"5d8e5a5c176c406c8d18c2c9703c9d1d",
"701de0e681de4df185dfd074ada39929",
"8bc3fd6e32f44a078e24d45c7cb17dc7",
"f7c3ba1086dd4627972302ba8faac68d",
"9cab2808b38b4a599c09cc903e90af6a",
"28d98921714244e8905cd66dfe65a882",
"2c2aac90798547e0a2c82ed318fdba32",
"895765feec51411a9013846a50d598cc",
"76466a1dcb39481d8b3291293e0515c0",
"93da8d8c37824f168e24db215e7db002",
"dbb9170bc9f344b6a5dafd5876075231",
"0b2b93d3dc714fbf8d4f5b25e8eeb475",
"4eddb51892de4baea2cc1c4863343d42",
"9373552abfd141c6b1634ac35564a25e",
"0b1adc8d2e6d4334bb67d1ae7f9c34bb",
"f4fd37bc6ef0437d92bf341122e013d2",
"20b14da1bbd84406abbb03e302ec8253",
"db9051b3e1bb44bda4b06c22222ce075"
]
},
"id": "P6xmnrjM_zpn",
"outputId": "3e70d011-3cdc-4d28-de8b-e1d5957bb77f"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" from tqdm.autonotebook import tqdm, trange\n",
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
"You will be able to reuse this secret in all of your notebooks.\n",
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"modules.json: 0%| | 0.00/229 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c0f3a801b82b4738a35e7910ef78b225"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"config_sentence_transformers.json: 0%| | 0.00/212 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "fa41e2c1da61494c951c5199213559ec"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"README.md: 0%| | 0.00/8.71k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "35ecdce9c569492e9c934a48d0a0caf5"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9af2ca42d80e41c7b69dc379f3ccb849"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"config.json: 0%| | 0.00/571 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "939c1f05bce54aa7bd313ac49e317e3f"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"model.safetensors: 0%| | 0.00/438M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "871e1bf7b242477387b92f9acdc4aa27"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/363 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9f7f0368d43c49bb8912a25e796f895c"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9400d3071cc2419fbcda51b47e03e7d2"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "4e9833fd35c546d8aea08153afe6af42"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/239 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "3698d1415da344459a343235b69e940a"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"1_Pooling/config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "895765feec51411a9013846a50d598cc"
}
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"## Q1. Getting the embeddings model\n",
"\n",
"Now, get the embeddings model `multi-qa-mpnet-base-dot-v1` from\n",
"[the Sentence Transformer library](https://www.sbert.net/docs/sentence_transformer/pretrained_models.html#model-overview)\n",
"\n",
"> Note: this is not the same model as in HW3\n",
"\n",
"```bash\n",
"from sentence_transformers import SentenceTransformer\n",
"embedding_model = SentenceTransformer(model_name)\n",
"```\n",
"\n",
"Create the embeddings for the first LLM answer:\n",
"\n",
"```python\n",
"answer_llm = df.iloc[0].answer_llm\n",
"```\n",
"\n",
"What's the first value of the resulting vector?\n",
"\n",
"* -0.42\n",
"* -0.22\n",
"* -0.02\n",
"* 0.21"
],
"metadata": {
"id": "LnsV-zfTCKqT"
}
},
{
"cell_type": "code",
"source": [
"# Create the embeddings for the first LLM answer:\n",
"answer_llm = df.iloc[0].answer_llm\n",
"# Q1: What's the first value of the resulting vector?-0.42, -0.22, -0.02, 0.21\n",
"f\"What's the first value of the resulting vector? R1: {embedding_model.encode(answer_llm)[0]}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "pGb-ZUY9_51c",
"outputId": "21c9c1fe-a6d1-4a33-ccbc-a200676142c3"
},
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"What's the first value of the resulting vector? R1: -0.4224466383457184\""
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "markdown",
"source": [
"## Q2. Computing the dot product\n",
"\n",
"\n",
"Now for each answer pair, let's create embeddings and compute dot product between them\n",
"\n",
"We will put the results (scores) into the `evaluations` list\n",
"\n",
"What's the 75% percentile of the score?\n",
"\n",
"* 21.67\n",
"* 31.67\n",
"* 41.67\n",
"* 51.67"
],
"metadata": {
"id": "TQQWxjUpApKh"
}
},
{
"cell_type": "code",
"source": [
"df.columns"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "p4VyHBl8AIaw",
"outputId": "0585592c-d2a8-4155-947a-6020abe97e00"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['answer_llm', 'answer_orig', 'document', 'question', 'course'], dtype='object')"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"evaluations = []\n",
"\n",
"def compute_similarity(model, record: dict):\n",
" answer_orig = record['answer_orig']\n",
" answer_llm = record['answer_llm']\n",
"\n",
" v_llm = model.encode(answer_llm)\n",
" v_orig = model.encode(answer_orig)\n",
"\n",
" return v_llm.dot(v_orig)"
],
"metadata": {
"id": "5MNIlpVQAupp"
},
"execution_count": 19,
"outputs": []
},
{
"cell_type": "code",
"source": [
"records = df.to_dict(orient='records')\n",
"records"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SzklfUMLBOSk",
"outputId": "1db5765c-d20c-489c-fb15-14c7b5e26139"
},
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[{'answer_llm': 'You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).',\n",
" 'answer_orig': 'Machine Learning Zoomcamp FAQ\\nThe purpose of this document is to capture frequently asked technical questions.\\nWe did this for our data engineering course and it worked quite well. Check this document for inspiration on how to structure your questions and answers:\\nData Engineering Zoomcamp FAQ\\nIn the course GitHub repository there’s a link. Here it is: https://airtable.com/shryxwLd0COOEaqXo\\nwork',\n",
" 'document': '0227b872',\n",
" 'question': 'Where can I sign up for the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can sign up using the link provided in the course GitHub repository: [https://airtable.com/shryxwLd0COOEaqXo](https://airtable.com/shryxwLd0COOEaqXo).',\n",
" 'answer_orig': 'Machine Learning Zoomcamp FAQ\\nThe purpose of this document is to capture frequently asked technical questions.\\nWe did this for our data engineering course and it worked quite well. Check this document for inspiration on how to structure your questions and answers:\\nData Engineering Zoomcamp FAQ\\nIn the course GitHub repository there’s a link. Here it is: https://airtable.com/shryxwLd0COOEaqXo\\nwork',\n",
" 'document': '0227b872',\n",
" 'question': 'Can you provide a link to sign up?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is an FAQ for the Machine Learning course. It addresses frequently asked technical questions and is available in the course GitHub repository. You can find the link to the FAQ [here](https://airtable.com/shryxwLd0COOEaqXo).',\n",
" 'answer_orig': 'Machine Learning Zoomcamp FAQ\\nThe purpose of this document is to capture frequently asked technical questions.\\nWe did this for our data engineering course and it worked quite well. Check this document for inspiration on how to structure your questions and answers:\\nData Engineering Zoomcamp FAQ\\nIn the course GitHub repository there’s a link. Here it is: https://airtable.com/shryxwLd0COOEaqXo\\nwork',\n",
" 'document': '0227b872',\n",
" 'question': 'Is there an FAQ for this Machine Learning course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The context does not provide any specific information about a GitHub repository for the sign-up link for the course. Therefore, I cannot confirm whether or not such a repository exists.',\n",
" 'answer_orig': 'Machine Learning Zoomcamp FAQ\\nThe purpose of this document is to capture frequently asked technical questions.\\nWe did this for our data engineering course and it worked quite well. Check this document for inspiration on how to structure your questions and answers:\\nData Engineering Zoomcamp FAQ\\nIn the course GitHub repository there’s a link. Here it is: https://airtable.com/shryxwLd0COOEaqXo\\nwork',\n",
" 'document': '0227b872',\n",
" 'question': 'Does this course have a GitHub repository for the sign-up link?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To structure your questions and answers for the course effectively, consider the following steps:\\n\\n1. **Identify Your Topic**: Start by clearly defining the topic your question addresses. This helps in organizing related questions together.\\n\\n2. **Be Specific**: Formulate your question to be specific and concise. This makes it easier for others to understand what information you're seeking.\\n\\n3. **Provide Context**: If your question arises from a particular scenario or issue, briefly explain that context so that it can be better understood.\\n\\n4. **Refer to Course Materials**: When formulating answers, reference course materials, such as course pages or specific modules, to provide a clear and credible response.\\n\\n5. **Consider Audience**: Keep in mind who will be reading your questions and answers. Make sure the language and complexity match their understanding level.\\n\\n6. **Format for Clarity**: Use bullet points or numbered lists for answers to enhance clarity and readability.\\n\\n7. **Incorporate Feedback Loop**: Encourage readers to ask follow-up questions or provide feedback on the answers if clarity is lacking.\\n\\nBy structuring your questions and answers this way, you can ensure they are clear, relevant, and beneficial to others in the course.\",\n",
" 'answer_orig': 'Machine Learning Zoomcamp FAQ\\nThe purpose of this document is to capture frequently asked technical questions.\\nWe did this for our data engineering course and it worked quite well. Check this document for inspiration on how to structure your questions and answers:\\nData Engineering Zoomcamp FAQ\\nIn the course GitHub repository there’s a link. Here it is: https://airtable.com/shryxwLd0COOEaqXo\\nwork',\n",
" 'document': '0227b872',\n",
" 'question': 'How can I structure my questions and answers for the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The course videos are pre-recorded, and you can start watching them right now.',\n",
" 'answer_orig': 'The course videos are pre-recorded, you can start watching the course right now.\\nWe will also occasionally have office hours - live sessions where we will answer your questions. The office hours sessions are recorded too.\\nYou can see the office hours as well as the pre-recorded course videos in the course playlist on YouTube.',\n",
" 'document': '39fda9f0',\n",
" 'question': 'Are the course videos live or pre-recorded?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can start watching the course videos right now, as they are pre-recorded. If you haven’t taken part in the previous iteration, you are encouraged to begin watching them.',\n",
" 'answer_orig': 'The course videos are pre-recorded, you can start watching the course right now.\\nWe will also occasionally have office hours - live sessions where we will answer your questions. The office hours sessions are recorded too.\\nYou can see the office hours as well as the pre-recorded course videos in the course playlist on YouTube.',\n",
" 'document': '39fda9f0',\n",
" 'question': 'When can I start watching the course videos?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the live office hours sessions are recorded.',\n",
" 'answer_orig': 'The course videos are pre-recorded, you can start watching the course right now.\\nWe will also occasionally have office hours - live sessions where we will answer your questions. The office hours sessions are recorded too.\\nYou can see the office hours as well as the pre-recorded course videos in the course playlist on YouTube.',\n",
" 'document': '39fda9f0',\n",
" 'question': 'Are the live office hours sessions recorded?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find the office hours sessions in the course playlist on YouTube, where both the live office hours and the pre-recorded course videos are available.',\n",
" 'answer_orig': 'The course videos are pre-recorded, you can start watching the course right now.\\nWe will also occasionally have office hours - live sessions where we will answer your questions. The office hours sessions are recorded too.\\nYou can see the office hours as well as the pre-recorded course videos in the course playlist on YouTube.',\n",
" 'document': '39fda9f0',\n",
" 'question': 'Where can I find the office hours sessions?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can access the pre-recorded course videos on the course page at http://mlzoomcamp.com/. You can also find them in the course playlist on YouTube by visiting the DTC YouTube channel and searching for the relevant course playlist.',\n",
" 'answer_orig': 'The course videos are pre-recorded, you can start watching the course right now.\\nWe will also occasionally have office hours - live sessions where we will answer your questions. The office hours sessions are recorded too.\\nYou can see the office hours as well as the pre-recorded course videos in the course playlist on YouTube.',\n",
" 'document': '39fda9f0',\n",
" 'question': 'Where can I access the pre-recorded course videos?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, all sessions are recorded, so if you miss one, you won't miss anything. You can catch up on the content later. Additionally, you can submit your questions in advance for office hours, and those sessions are also recorded.\",\n",
" 'answer_orig': 'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.',\n",
" 'document': '5170565b',\n",
" 'question': 'Are sessions recorded if I miss one?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you can ask your questions in advance if you can't attend a live stream. You can submit your questions for office hours ahead of time, and they will be covered during the live session. Additionally, you can always ask questions in Slack.\",\n",
" 'answer_orig': 'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.',\n",
" 'document': '5170565b',\n",
" 'question': \"Can I ask questions in advance if I can't attend a live stream?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you miss a session, don't worry! Everything is recorded, so you won’t miss anything. You can also ask your questions for office hours in advance, and they will be covered during the live stream. Additionally, you can always ask questions in Slack.\",\n",
" 'answer_orig': 'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.',\n",
" 'document': '5170565b',\n",
" 'question': 'How will my questions be addressed if I miss a session?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is a way to catch up on a missed session. Everything is recorded, so you won’t miss anything. You can also ask your questions for office hours in advance, and they will be covered during the live stream. Additionally, you can always ask questions in Slack.',\n",
" 'answer_orig': 'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.',\n",
" 'document': '5170565b',\n",
" 'question': 'Is there a way to catch up on a missed session?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can still interact with instructors after missing a session. Everything is recorded, so you won’t miss any content. You can ask your questions in advance for office hours, and those questions will be covered during the live stream. Additionally, you can always ask questions in Slack.',\n",
" 'answer_orig': 'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.',\n",
" 'document': '5170565b',\n",
" 'question': 'Can I still interact with instructors after missing a session?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The course will cover the bare minimum of theory, focusing more on practical application. The theory will be addressed on an intuitive level, meaning specific derivations, such as the gradient update rule for logistic regression, will not be covered in depth. Instead, the course will teach how to use logistic regression and interpret the results.',\n",
" 'answer_orig': \"The bare minimum. The focus is more on practice, and we'll cover the theory only on the intuitive level.: https://mlbookcamp.com/article/python\\nFor example, we won't derive the gradient update rule for logistic regression (there are other great courses for that), but we'll cover how to use logistic regression and make sense of the results.\",\n",
" 'document': 'ecca790c',\n",
" 'question': 'How much theoretical content is there in the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, the course will not cover theoretical derivations like the gradient update rule for logistic regression. The focus is more on practical applications, and the theory will only be covered at an intuitive level.',\n",
" 'answer_orig': \"The bare minimum. The focus is more on practice, and we'll cover the theory only on the intuitive level.: https://mlbookcamp.com/article/python\\nFor example, we won't derive the gradient update rule for logistic regression (there are other great courses for that), but we'll cover how to use logistic regression and make sense of the results.\",\n",
" 'document': 'ecca790c',\n",
" 'question': 'Will the course cover theoretical derivations like the gradient update rule for logistic regression?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The focus of the course is more on practice rather than theory. The course will cover only the bare minimum of theory, primarily on an intuitive level, while emphasizing practical applications and skills.',\n",
" 'answer_orig': \"The bare minimum. The focus is more on practice, and we'll cover the theory only on the intuitive level.: https://mlbookcamp.com/article/python\\nFor example, we won't derive the gradient update rule for logistic regression (there are other great courses for that), but we'll cover how to use logistic regression and make sense of the results.\",\n",
" 'document': 'ecca790c',\n",
" 'question': 'Is the focus of the course more on practice or theory?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To find more about the theoretical topics not covered in this course, consider looking for other great courses that delve deeper into those areas. The course itself focuses primarily on practical applications and covers theoretical concepts only at an intuitive level.',\n",
" 'answer_orig': \"The bare minimum. The focus is more on practice, and we'll cover the theory only on the intuitive level.: https://mlbookcamp.com/article/python\\nFor example, we won't derive the gradient update rule for logistic regression (there are other great courses for that), but we'll cover how to use logistic regression and make sense of the results.\",\n",
" 'document': 'ecca790c',\n",
" 'question': 'Where can I find more about the theoretical topics not covered in this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the course will help you understand how to use logistic regression and interpret its results, although it will cover the theory only at an intuitive level. The course focuses more on practical applications rather than in-depth theoretical derivations.',\n",
" 'answer_orig': \"The bare minimum. The focus is more on practice, and we'll cover the theory only on the intuitive level.: https://mlbookcamp.com/article/python\\nFor example, we won't derive the gradient update rule for logistic regression (there are other great courses for that), but we'll cover how to use logistic regression and make sense of the results.\",\n",
" 'document': 'ecca790c',\n",
" 'question': 'Will the course help me understand how to use logistic regression and interpret its results?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No prior knowledge in math is necessary for this course. While some linear algebra will be covered, the course will primarily focus on code with very few formulas.',\n",
" 'answer_orig': \"Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.\\nHere are some interesting videos covering linear algebra that you can already watch: ML Zoomcamp 1.8 - Linear Algebra Refresher from Alexey Grigorev or the excellent playlist from 3Blue1Brown Vectors | Chapter 1, Essence of linear algebra. Never hesitate to ask the community for help if you have any question.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'c25b3de4',\n",
" 'question': 'Is prior knowledge in math necessary for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The course will cover some linear algebra, but there will be very few formulas involved, focusing mostly on code instead.',\n",
" 'answer_orig': \"Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.\\nHere are some interesting videos covering linear algebra that you can already watch: ML Zoomcamp 1.8 - Linear Algebra Refresher from Alexey Grigorev or the excellent playlist from 3Blue1Brown Vectors | Chapter 1, Essence of linear algebra. Never hesitate to ask the community for help if you have any question.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'c25b3de4',\n",
" 'question': 'What type of mathematical content will be covered?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there are resources available to help with linear algebra. You can watch the \"ML Zoomcamp 1.8 - Linear Algebra Refresher\" from Alexey Grigorev and the \"Essence of Linear Algebra\" playlist from 3Blue1Brown. Additionally, you can ask the community for help if you have any questions.',\n",
" 'answer_orig': \"Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.\\nHere are some interesting videos covering linear algebra that you can already watch: ML Zoomcamp 1.8 - Linear Algebra Refresher from Alexey Grigorev or the excellent playlist from 3Blue1Brown Vectors | Chapter 1, Essence of linear algebra. Never hesitate to ask the community for help if you have any question.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'c25b3de4',\n",
" 'question': 'Are there resources available to help with linear algebra?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can seek help from the community for math-related questions.',\n",
" 'answer_orig': \"Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.\\nHere are some interesting videos covering linear algebra that you can already watch: ML Zoomcamp 1.8 - Linear Algebra Refresher from Alexey Grigorev or the excellent playlist from 3Blue1Brown Vectors | Chapter 1, Essence of linear algebra. Never hesitate to ask the community for help if you have any question.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'c25b3de4',\n",
" 'question': 'Can I seek help from the community for math-related questions?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there are recommended videos for learning linear algebra. You can watch \"ML Zoomcamp 1.8 - Linear Algebra Refresher\" by Alexey Grigorev, or you can check out the excellent playlist from 3Blue1Brown titled \"Vectors | Chapter 1, Essence of Linear Algebra.\"',\n",
" 'answer_orig': \"Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.\\nHere are some interesting videos covering linear algebra that you can already watch: ML Zoomcamp 1.8 - Linear Algebra Refresher from Alexey Grigorev or the excellent playlist from 3Blue1Brown Vectors | Chapter 1, Essence of linear algebra. Never hesitate to ask the community for help if you have any question.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'c25b3de4',\n",
" 'question': 'Are there any recommended videos for learning linear algebra?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you haven't received a confirmation email after filling out the form, it is normal to experience a delay as the process is automated. You should eventually receive the email. In the meantime, check your promotions tab in Gmail as well as your spam folder. If you unsubscribed from the newsletter, you won't receive course-related updates. To ensure you don’t miss any important information, consider joining the #course-ml-zoomcamp channel in Slack and the telegram channel with announcements.\",\n",
" 'answer_orig': \"The process is automated now, so you should receive the email eventually. If you haven’t, check your promotions tab in Gmail as well as spam.\\nIf you unsubscribed from our newsletter, you won't get course related updates too.\\nBut don't worry, it’s not a problem. To make sure you don’t miss anything, join the #course-ml-zoomcamp channel in Slack and our telegram channel with announcements. This is enough to follow the course.\",\n",
" 'document': '6ba259b1',\n",
" 'question': \"What should I do if I haven't received a confirmation email after filling out the form?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, it is possible that your confirmation email went to your spam folder. If you haven't received it, it's recommended to check your promotions tab in Gmail as well as your spam folder.\",\n",
" 'answer_orig': \"The process is automated now, so you should receive the email eventually. If you haven’t, check your promotions tab in Gmail as well as spam.\\nIf you unsubscribed from our newsletter, you won't get course related updates too.\\nBut don't worry, it’s not a problem. To make sure you don’t miss anything, join the #course-ml-zoomcamp channel in Slack and our telegram channel with announcements. This is enough to follow the course.\",\n",
" 'document': '6ba259b1',\n",
" 'question': 'Is it possible my confirmation email went to my spam folder?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, unsubscribing from the newsletter will affect your receipt of course-related updates. If you unsubscribe, you won't receive those updates. However, you can join the #course-ml-zoomcamp channel in Slack and the telegram channel with announcements to stay informed about the course.\",\n",
" 'answer_orig': \"The process is automated now, so you should receive the email eventually. If you haven’t, check your promotions tab in Gmail as well as spam.\\nIf you unsubscribed from our newsletter, you won't get course related updates too.\\nBut don't worry, it’s not a problem. To make sure you don’t miss anything, join the #course-ml-zoomcamp channel in Slack and our telegram channel with announcements. This is enough to follow the course.\",\n",
" 'document': '6ba259b1',\n",
" 'question': 'Can unsubscribing from the newsletter affect receiving course updates?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you don't receive the confirmation email, check your promotions tab in Gmail as well as your spam folder. If you unsubscribed from the newsletter, you won't receive course-related updates. To ensure you don’t miss any updates, join the #course-ml-zoomcamp channel in Slack and the telegram channel with announcements. This will keep you informed about the course.\",\n",
" 'answer_orig': \"The process is automated now, so you should receive the email eventually. If you haven’t, check your promotions tab in Gmail as well as spam.\\nIf you unsubscribed from our newsletter, you won't get course related updates too.\\nBut don't worry, it’s not a problem. To make sure you don’t miss anything, join the #course-ml-zoomcamp channel in Slack and our telegram channel with announcements. This is enough to follow the course.\",\n",
" 'document': '6ba259b1',\n",
" 'question': 'Where can I get course updates if I don’t receive the confirmation email?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you're having email issues, you can stay informed about the course by joining the #course-ml-zoomcamp channel in Slack and the Telegram channel with announcements. This will ensure you don’t miss any important updates regarding the course.\",\n",
" 'answer_orig': \"The process is automated now, so you should receive the email eventually. If you haven’t, check your promotions tab in Gmail as well as spam.\\nIf you unsubscribed from our newsletter, you won't get course related updates too.\\nBut don't worry, it’s not a problem. To make sure you don’t miss anything, join the #course-ml-zoomcamp channel in Slack and our telegram channel with announcements. This is enough to follow the course.\",\n",
" 'document': '6ba259b1',\n",
" 'question': \"How can I stay informed about the course if I'm having email issues?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The duration of the course is approximately 4 months, but it may take longer if you choose to engage in extra activities such as an additional project or article.',\n",
" 'answer_orig': 'Approximately 4 months, but may take more if you want to do some extra activities (an extra project, an article, etc)',\n",
" 'document': '67e2fd13',\n",
" 'question': 'What is the duration of the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The course takes approximately 4 months, but it may take more time if you choose to participate in extra activities such as an extra project or an article.',\n",
" 'answer_orig': 'Approximately 4 months, but may take more if you want to do some extra activities (an extra project, an article, etc)',\n",
" 'document': '67e2fd13',\n",
" 'question': 'How many months does the course take?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the course can take more than 4 months if you choose to engage in extra activities such as an additional project or article.',\n",
" 'answer_orig': 'Approximately 4 months, but may take more if you want to do some extra activities (an extra project, an article, etc)',\n",
" 'document': '67e2fd13',\n",
" 'question': 'Can the course take more than 4 months?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Based on the context, the course duration is approximately 4 months, but it may take longer if you choose to engage in extra activities, such as additional projects or articles. There is no specific mention of a formal process for extending the course duration itself, but the self-paced mode allows you to go through the materials at your own speed.',\n",
" 'answer_orig': 'Approximately 4 months, but may take more if you want to do some extra activities (an extra project, an article, etc)',\n",
" 'document': '67e2fd13',\n",
" 'question': 'Is it possible to extend the course duration?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Extra activities that might extend the course time include undertaking an extra project or writing an article.',\n",
" 'answer_orig': 'Approximately 4 months, but may take more if you want to do some extra activities (an extra project, an article, etc)',\n",
" 'document': '67e2fd13',\n",
" 'question': 'What extra activities might extend the course time?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You should allocate around ~10 hours per week for this course.',\n",
" 'answer_orig': 'Around ~10 hours per week. Timur Kamaliev did a detailed analysis of how much time students of the previous cohort needed to spend on different modules and projects. Full article',\n",
" 'document': 'a6897e8c',\n",
" 'question': 'How many hours per week should I allocate for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The time needed for different modules was analyzed by Timur Kamaliev.',\n",
" 'answer_orig': 'Around ~10 hours per week. Timur Kamaliev did a detailed analysis of how much time students of the previous cohort needed to spend on different modules and projects. Full article',\n",
" 'document': 'a6897e8c',\n",
" 'question': 'Who analyzed the time needed for different modules?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Students of the previous cohort spent around ~10 hours per week on the course.',\n",
" 'answer_orig': 'Around ~10 hours per week. Timur Kamaliev did a detailed analysis of how much time students of the previous cohort needed to spend on different modules and projects. Full article',\n",
" 'document': 'a6897e8c',\n",
" 'question': 'How much time did students of the previous cohort spend weekly?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The estimated weekly time commitment for the course is around 10 hours per week.',\n",
" 'answer_orig': 'Around ~10 hours per week. Timur Kamaliev did a detailed analysis of how much time students of the previous cohort needed to spend on different modules and projects. Full article',\n",
" 'document': 'a6897e8c',\n",
" 'question': 'Can you give an estimated weekly time commitment for the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find the detailed analysis of study time requirements conducted by Timur Kamaliev in a full article that outlines how much time students of the previous cohort needed to spend on different modules and projects.',\n",
" 'answer_orig': 'Around ~10 hours per week. Timur Kamaliev did a detailed analysis of how much time students of the previous cohort needed to spend on different modules and projects. Full article',\n",
" 'document': 'a6897e8c',\n",
" 'question': 'Where can I find the detailed analysis of study time requirements?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To earn a certificate in this course, you need to submit at least 2 out of 3 course projects and review 3 peers’ projects by the deadline. If you successfully meet these requirements, you will receive a certificate.',\n",
" 'answer_orig': 'Yes, if you finish at least 2 out of 3 projects and review 3 peers’ Projects by the deadline, you will get a certificate. This is what it looks like: link. There’s also a version without a robot: link.',\n",
" 'document': '2eba08e3',\n",
" 'question': 'How can I earn a certificate in this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To receive a certificate, you need to submit at least 2 out of the 3 course projects and peer-review at least 3 projects from your course-mates for each submission by the deadline.',\n",
" 'answer_orig': 'Yes, if you finish at least 2 out of 3 projects and review 3 peers’ Projects by the deadline, you will get a certificate. This is what it looks like: link. There’s also a version without a robot: link.',\n",
" 'document': '2eba08e3',\n",
" 'question': 'What are the requirements to receive a certificate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, you do not need to complete all projects to get a certificate. You only need to submit at least 2 out of 3 projects and review 3 peers’ projects by the deadline.',\n",
" 'answer_orig': 'Yes, if you finish at least 2 out of 3 projects and review 3 peers’ Projects by the deadline, you will get a certificate. This is what it looks like: link. There’s also a version without a robot: link.',\n",
" 'document': '2eba08e3',\n",
" 'question': 'Do I need to complete all projects to get a certificate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The deadline to qualify for the certificate is based on when you need to submit 2 out of 3 course projects and review 3 peers’ projects. Specific deadline dates can be found on the 2023 cohort page.',\n",
" 'answer_orig': 'Yes, if you finish at least 2 out of 3 projects and review 3 peers’ Projects by the deadline, you will get a certificate. This is what it looks like: link. There’s also a version without a robot: link.',\n",
" 'document': '2eba08e3',\n",
" 'question': 'What is the deadline to qualify for the certificate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is an option for a certificate without a robot image.',\n",
" 'answer_orig': 'Yes, if you finish at least 2 out of 3 projects and review 3 peers’ Projects by the deadline, you will get a certificate. This is what it looks like: link. There’s also a version without a robot: link.',\n",
" 'document': '2eba08e3',\n",
" 'question': 'Is there an option for a certificate without a robot image?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you could still receive a certificate even if you don't complete the midterm project, as long as you finish at least 2 out of 3 projects and review 3 peers' projects by the deadline.\",\n",
" 'answer_orig': \"Yes, it's possible. See the previous answer.\",\n",
" 'document': '1d644223',\n",
" 'question': \"Will I receive a certificate if I don't complete the midterm project?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, it is possible to graduate without submitting the midterm project, as you can still receive a certificate by completing at least 2 out of 3 projects and reviewing 3 peers’ projects by the deadline.',\n",
" 'answer_orig': \"Yes, it's possible. See the previous answer.\",\n",
" 'document': '1d644223',\n",
" 'question': 'Can I still graduate without submitting the midterm project?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you may still be eligible for certification if you missed the midterm project. As long as you complete at least 2 out of the 3 projects and review 3 peers’ projects by the deadline, you will receive a certificate.',\n",
" 'answer_orig': \"Yes, it's possible. See the previous answer.\",\n",
" 'document': '1d644223',\n",
" 'question': 'Am I eligible for certification if I missed the midterm project?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, it is not mandatory to complete the midterm project for the certificate. You can receive a certificate by submitting at least 2 out of the 3 course projects and reviewing 3 peers’ projects by the deadline.',\n",
" 'answer_orig': \"Yes, it's possible. See the previous answer.\",\n",
" 'document': '1d644223',\n",
" 'question': 'Is it mandatory to complete the midterm project for the certificate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, missing the midterm project does not affect your certification eligibility as long as you submit at least two out of three projects and complete the peer reviews required. According to the course coordinator, only two projects are needed to obtain the course certificate.',\n",
" 'answer_orig': \"Yes, it's possible. See the previous answer.\",\n",
" 'document': '1d644223',\n",
" 'question': 'Does missing the midterm project affect my certification eligibility?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To participate in this course, you should have a basic understanding of Python, specifically the following skills:\\n\\n1. How to run a Jupyter notebook.\\n2. How to import libraries and an understanding of what libraries are.\\n3. How to declare a variable and what variables are.\\n4. Important operations related to data analysis.\\n\\nIf you are familiar with these topics, you know enough Python for the course. If not, it is recommended that you review these basics before enrolling. You can also take the free course \"Learn Python Basics for Data Analysis\" on the OpenClassrooms e-learning platform to help you prepare.',\n",
" 'answer_orig': 'Check this article. If you know everything in this article, you know enough. If you don’t, read the article and join the coursIntroduction to Pythone too :)\\nIntroduction to Python – Machine Learning Bookcamp\\nYou can follow this English course from the OpenClassrooms e-learning platform, which is free and covers the python basics for data analysis: Learn Python Basics for Data Analysis - OpenClassrooms . It is important to know some basics such as: how to run a Jupyter notebook, how to import libraries (and what libraries are), how to declare a variable (and what variables are) and some important operations regarding data analysis.\\n(Mélanie Fouesnard)',\n",
" 'document': '14890cd2',\n",
" 'question': 'What Python basics are required for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can learn the necessary Python knowledge for this course by reading an article that outlines the required Python skills. If you know everything in that article, you are prepared for the course. If not, it\\'s recommended that you read the article and also consider joining the \"Introduction to Python\" course. Additionally, you can follow a free English course on the OpenClassrooms e-learning platform titled \"Learn Python Basics for Data Analysis,\" which covers the Python basics essential for data analysis, such as running a Jupyter notebook, importing libraries, declaring variables, and performing important data analysis operations.',\n",
" 'answer_orig': 'Check this article. If you know everything in this article, you know enough. If you don’t, read the article and join the coursIntroduction to Pythone too :)\\nIntroduction to Python – Machine Learning Bookcamp\\nYou can follow this English course from the OpenClassrooms e-learning platform, which is free and covers the python basics for data analysis: Learn Python Basics for Data Analysis - OpenClassrooms . It is important to know some basics such as: how to run a Jupyter notebook, how to import libraries (and what libraries are), how to declare a variable (and what variables are) and some important operations regarding data analysis.\\n(Mélanie Fouesnard)',\n",
" 'document': '14890cd2',\n",
" 'question': 'Where can I learn the necessary Python knowledge for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'It is important to know how to run a Jupyter notebook for this course because notebooks provide a user-friendly interface for developing and testing machine learning models. They allow for interactive coding, visualization of data, and step-by-step execution of code, which can simplify the training process compared to running scripts. Additionally, understanding how to navigate and execute commands in Jupyter notebooks is essential when working on projects that may require peer review, as seen with the need for a train.py file that complements the notebook for seamless collaboration.',\n",
" 'answer_orig': 'Check this article. If you know everything in this article, you know enough. If you don’t, read the article and join the coursIntroduction to Pythone too :)\\nIntroduction to Python – Machine Learning Bookcamp\\nYou can follow this English course from the OpenClassrooms e-learning platform, which is free and covers the python basics for data analysis: Learn Python Basics for Data Analysis - OpenClassrooms . It is important to know some basics such as: how to run a Jupyter notebook, how to import libraries (and what libraries are), how to declare a variable (and what variables are) and some important operations regarding data analysis.\\n(Mélanie Fouesnard)',\n",
" 'document': '14890cd2',\n",
" 'question': 'Why is it important to know how to run a Jupyter notebook for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'For this course, you should know some basic operations regarding data analysis, including how to run a Jupyter notebook, how to import libraries and understand what libraries are, how to declare a variable and understand what variables are, and engage in important operations related to data analysis.',\n",
" 'answer_orig': 'Check this article. If you know everything in this article, you know enough. If you don’t, read the article and join the coursIntroduction to Pythone too :)\\nIntroduction to Python – Machine Learning Bookcamp\\nYou can follow this English course from the OpenClassrooms e-learning platform, which is free and covers the python basics for data analysis: Learn Python Basics for Data Analysis - OpenClassrooms . It is important to know some basics such as: how to run a Jupyter notebook, how to import libraries (and what libraries are), how to declare a variable (and what variables are) and some important operations regarding data analysis.\\n(Mélanie Fouesnard)',\n",
" 'document': '14890cd2',\n",
" 'question': 'What basic operations regarding data analysis should I know for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To learn how to declare variables in Python for this course, it\\'s recommended that you read the article mentioned in the FAQs. This article will provide the necessary information about basic Python knowledge, including how to declare a variable and what variables are. Additionally, you can follow the free English course, \"Learn Python Basics for Data Analysis,\" offered by OpenClassrooms, which covers Python basics that are essential for data analysis, including variable declaration.',\n",
" 'answer_orig': 'Check this article. If you know everything in this article, you know enough. If you don’t, read the article and join the coursIntroduction to Pythone too :)\\nIntroduction to Python – Machine Learning Bookcamp\\nYou can follow this English course from the OpenClassrooms e-learning platform, which is free and covers the python basics for data analysis: Learn Python Basics for Data Analysis - OpenClassrooms . It is important to know some basics such as: how to run a Jupyter notebook, how to import libraries (and what libraries are), how to declare a variable (and what variables are) and some important operations regarding data analysis.\\n(Mélanie Fouesnard)',\n",
" 'document': '14890cd2',\n",
" 'question': 'How can I learn to declare variables in Python for this course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'For the course, all you need is a working laptop with an internet connection for the Machine Learning part. The Deep Learning part is more resource-intensive, but you can use a cloud service (Saturn Cloud is recommended, but others can be used as well).',\n",
" 'answer_orig': 'For the Machine Learning part, all you need is a working laptop with an internet connection. The Deep Learning part is more resource intensive, but for that you can use a cloud (we use Saturn cloud but can be anything else).\\n(Rileen Sinha; based on response by Alexey on Slack)',\n",
" 'document': 'a4fad482',\n",
" 'question': 'Are there any specific hardware requirements for the course, or is everything mainly based on cloud services?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, for the Machine Learning section, you just need a working laptop with an internet connection.',\n",
" 'answer_orig': 'For the Machine Learning part, all you need is a working laptop with an internet connection. The Deep Learning part is more resource intensive, but for that you can use a cloud (we use Saturn cloud but can be anything else).\\n(Rileen Sinha; based on response by Alexey on Slack)',\n",
" 'document': 'a4fad482',\n",
" 'question': 'Do I just need a laptop and internet for the Machine Learning section?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the Deep Learning part of the course is more resource-intensive than the Machine Learning part. While a working laptop with an internet connection is sufficient for the Machine Learning part, the Deep Learning part requires more resources, for which you can use cloud services (the course uses Saturn cloud but other options are available).',\n",
" 'answer_orig': 'For the Machine Learning part, all you need is a working laptop with an internet connection. The Deep Learning part is more resource intensive, but for that you can use a cloud (we use Saturn cloud but can be anything else).\\n(Rileen Sinha; based on response by Alexey on Slack)',\n",
" 'document': 'a4fad482',\n",
" 'question': 'Is the Deep Learning part of the course more resource-intensive than the Machine Learning part?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, Saturn Cloud can be used for the Deep Learning part of the course. While the Deep Learning section is more resource-intensive, you can utilize Saturn Cloud or any other cloud service for these tasks.',\n",
" 'answer_orig': 'For the Machine Learning part, all you need is a working laptop with an internet connection. The Deep Learning part is more resource intensive, but for that you can use a cloud (we use Saturn cloud but can be anything else).\\n(Rileen Sinha; based on response by Alexey on Slack)',\n",
" 'document': 'a4fad482',\n",
" 'question': 'Can Saturn Cloud be used for the Deep Learning part of the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, it is possible to use a cloud service other than Saturn Cloud for the Deep Learning section. While Saturn Cloud is used in the course, any other cloud service can be utilized as well.',\n",
" 'answer_orig': 'For the Machine Learning part, all you need is a working laptop with an internet connection. The Deep Learning part is more resource intensive, but for that you can use a cloud (we use Saturn cloud but can be anything else).\\n(Rileen Sinha; based on response by Alexey on Slack)',\n",
" 'document': 'a4fad482',\n",
" 'question': 'Is it possible to use a cloud service other than Saturn Cloud for the Deep Learning section?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find instructions to install TensorFlow GPU on Ubuntu by referring to this article: [https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/](https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/). Additionally, make sure to follow steps to install CUDA and cuDNN as they are necessary for GPU support.',\n",
" 'answer_orig': 'Here is an article that worked for me: https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/',\n",
" 'document': '34b7fd35',\n",
" 'question': 'Where can I find instructions to install TensorFlow GPU on Ubuntu?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is a guide to set up TensorFlow with GPU support on Ubuntu. You can find an article that worked for someone here: [Setting up TensorFlow with GPU on Ubuntu](https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/). Additionally, you will need to install CUDA and cuDNN, and the context provides a simplified guide for that setup. Make sure to follow the installation instructions carefully to ensure proper functionality.',\n",
" 'answer_orig': 'Here is an article that worked for me: https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/',\n",
" 'document': '34b7fd35',\n",
" 'question': 'Is there a guide to set up TensorFlow with GPU on Ubuntu?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To configure GPU support for TensorFlow on your Ubuntu machine, you'll need to install CUDA and cuDNN. A simplified guide for installation can be helpful. Additionally, you may find an article that outlines the process useful: https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/. Make sure to follow the steps for installing the appropriate NVIDIA drivers and setting up the TensorFlow environment tailored for GPU usage.\",\n",
" 'answer_orig': 'Here is an article that worked for me: https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/',\n",
" 'document': '34b7fd35',\n",
" 'question': 'How do I configure GPU support for TensorFlow on my Ubuntu machine?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can refer to the article linked here for guidance on setting up TensorFlow with GPU support on Ubuntu: [Setup TensorFlow with GPU support](https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/). This resource includes helpful steps for the installation process.',\n",
" 'answer_orig': 'Here is an article that worked for me: https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/',\n",
" 'document': '34b7fd35',\n",
" 'question': 'Can you direct me to a resource for installing TensorFlow with GPU on Ubuntu?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To set up TensorFlow with GPU support on Ubuntu, follow these general steps:\\n\\n1. **Install Nvidia Drivers**: You can download the necessary Nvidia drivers from the official Nvidia website: [Nvidia Drivers](https://www.nvidia.com/download/index.aspx).\\n\\n2. **Install CUDA Toolkit**: Download and install the CUDA toolkit (version 11.x.x) from the CUDA Toolkit Archive: [CUDA Toolkit Archive](https://developer.nvidia.com/cuda-toolkit-archive).\\n\\n3. **Install cuDNN**: Download and install cuDNN from the following link: [cuDNN Download](https://developer.nvidia.com/rdp/cudnn-download).\\n\\n4. **Install TensorFlow**: You have two options:\\n - Install TensorFlow with GPU support through Anaconda by running: `conda install tensorflow-gpu`.\\n - Alternatively, install TensorFlow using pip by following the instructions at the TensorFlow installation guide for Linux: [TensorFlow Installation](https://www.tensorflow.org/install/pip#linux_1).\\n\\n5. **Set Environment Variables**: Make sure to follow any specific instructions for setting environment variables as necessary.\\n\\nFor a detailed guide tailored to your needs, you can refer to this article: [Setting up TensorFlow GPU on Ubuntu](https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/).',\n",
" 'answer_orig': 'Here is an article that worked for me: https://knowmledge.com/2023/12/07/ml-zoomcamp-2023-project/',\n",
" 'document': '34b7fd35',\n",
" 'question': 'What is the procedure to set up TensorFlow GPU on Ubuntu?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To join a channel in Slack, follow these steps:\\n\\n1. Click “All channels” at the top of your left sidebar. If you don't see this option, click “More” to find it.\\n2. Browse the list of public channels in your workspace or use the search bar to search by channel name or description.\\n3. Select a channel from the list to view it.\\n4. Click Join Channel.\\n\\nFor further details, you can visit the Slack help page: [Join a channel](https://slack.com/help/articles/205239967-Join-a-channel).\",\n",
" 'answer_orig': \"Here’s how you join a in Slack: https://slack.com/help/articles/205239967-Join-a-channel\\nClick “All channels” at the top of your left sidebar. If you don't see this option, click “More” to find it.\\nBrowse the list of public channels in your workspace, or use the search bar to search by channel name or description.\\nSelect a channel from the list to view it.\\nClick Join Channel.\\nDo we need to provide the GitHub link to only our code corresponding to the homework questions?\\nYes. You are required to provide the URL to your repo in order to receive a grade\",\n",
" 'document': '4930aa19',\n",
" 'question': 'How can I join a channel in Slack?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To find course channels on Slack, follow these steps:\\n\\n1. Go to the Slack workspace.\\n2. Click “All channels” at the top of your left sidebar. If you don’t see this option, click “More” to find it.\\n3. Browse the list of public channels or use the search bar to search by channel name or description.\\n4. Select the channel you want from the list to view it.\\n5. Click “Join Channel” to become a member. \\n\\nFor more detailed guidance, you can visit this link: https://slack.com/help/articles/205239967-Join-a-channel.',\n",
" 'answer_orig': \"Here’s how you join a in Slack: https://slack.com/help/articles/205239967-Join-a-channel\\nClick “All channels” at the top of your left sidebar. If you don't see this option, click “More” to find it.\\nBrowse the list of public channels in your workspace, or use the search bar to search by channel name or description.\\nSelect a channel from the list to view it.\\nClick Join Channel.\\nDo we need to provide the GitHub link to only our code corresponding to the homework questions?\\nYes. You are required to provide the URL to your repo in order to receive a grade\",\n",
" 'document': '4930aa19',\n",
" 'question': 'What steps should I follow to find course channels on Slack?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you need to provide the URL to your GitHub repository. The repository must be public so that whoever corrects the homework can access it.',\n",
" 'answer_orig': \"Here’s how you join a in Slack: https://slack.com/help/articles/205239967-Join-a-channel\\nClick “All channels” at the top of your left sidebar. If you don't see this option, click “More” to find it.\\nBrowse the list of public channels in your workspace, or use the search bar to search by channel name or description.\\nSelect a channel from the list to view it.\\nClick Join Channel.\\nDo we need to provide the GitHub link to only our code corresponding to the homework questions?\\nYes. You are required to provide the URL to your repo in order to receive a grade\",\n",
" 'document': '4930aa19',\n",
" 'question': 'Is there a need to give the URL to our GitHub repo?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You need to provide the GitHub URL for your homework by including the URL to your repository. This URL is required in order to receive a grade for your homework.',\n",
" 'answer_orig': \"Here’s how you join a in Slack: https://slack.com/help/articles/205239967-Join-a-channel\\nClick “All channels” at the top of your left sidebar. If you don't see this option, click “More” to find it.\\nBrowse the list of public channels in your workspace, or use the search bar to search by channel name or description.\\nSelect a channel from the list to view it.\\nClick Join Channel.\\nDo we need to provide the GitHub link to only our code corresponding to the homework questions?\\nYes. You are required to provide the URL to your repo in order to receive a grade\",\n",
" 'document': '4930aa19',\n",
" 'question': 'Where do I provide the GitHub URL for our homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To join a Slack channel, follow these steps:\\n\\n1. Click \"All channels\" at the top of your left sidebar. If you don\\'t see this option, click “More” to find it.\\n2. Browse the list of public channels in your workspace, or use the search bar to search by channel name or description.\\n3. Select a channel from the list to view it.\\n4. Click \"Join Channel.\"\\n\\nFor more detailed instructions, you can visit this link: https://slack.com/help/articles/205239967-Join-a-channel.',\n",
" 'answer_orig': \"Here’s how you join a in Slack: https://slack.com/help/articles/205239967-Join-a-channel\\nClick “All channels” at the top of your left sidebar. If you don't see this option, click “More” to find it.\\nBrowse the list of public channels in your workspace, or use the search bar to search by channel name or description.\\nSelect a channel from the list to view it.\\nClick Join Channel.\\nDo we need to provide the GitHub link to only our code corresponding to the homework questions?\\nYes. You are required to provide the URL to your repo in order to receive a grade\",\n",
" 'document': '4930aa19',\n",
" 'question': 'Can you show me how to join a Slack channel?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you can still participate in the course if you join late. However, you won’t be able to submit some of the homeworks. To receive a certificate, you need to submit 2 out of 3 course projects and review 3 peers' projects by the deadline.\",\n",
" 'answer_orig': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',\n",
" 'document': 'ee58a693',\n",
" 'question': 'If I join the course late, can I still participate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can still obtain a certificate if you join the course after it has started. As long as you submit 2 out of 3 course projects and review 3 peers’ projects by the deadline, you will be eligible for a certificate.',\n",
" 'answer_orig': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',\n",
" 'document': 'ee58a693',\n",
" 'question': 'Will I be able to obtain a certificate if I join the course after it has started?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To be eligible for a certificate, you must submit 2 out of 3 course projects and review 3 peers’ projects by the deadline.',\n",
" 'answer_orig': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',\n",
" 'document': 'ee58a693',\n",
" 'question': 'How many course projects must I submit to be eligible for a certificate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To receive a certificate, you are required to finish at least 2 out of 3 projects and review 3 peers' projects by the deadline.\",\n",
" 'answer_orig': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',\n",
" 'document': 'ee58a693',\n",
" 'question': 'What is the requirement for peer reviews to get a certificate?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can join the course after it has started. However, you won’t be able to submit some of the homeworks. Homework is not mandatory for finishing the course, but to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline.',\n",
" 'answer_orig': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',\n",
" 'document': 'ee58a693',\n",
" 'question': 'Can I submit homework if I join the course after it has started?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the course is available in a self-paced mode, allowing you to go through the materials at any time.',\n",
" 'answer_orig': 'The course is available in the self-paced mode too, so you can go through the materials at any time. But if you want to do it as a cohort with other students, the next iterations will happen in September 2023, September 2024 (and potentially other Septembers as well).',\n",
" 'document': '636f55d5',\n",
" 'question': 'Is the course available in a self-paced mode?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can start the course anytime. The course is available in self-paced mode, allowing you to go through the materials at any time. If you prefer to do it with other students, the next cohort iterations will begin in September 2023 and September 2024.',\n",
" 'answer_orig': 'The course is available in the self-paced mode too, so you can go through the materials at any time. But if you want to do it as a cohort with other students, the next iterations will happen in September 2023, September 2024 (and potentially other Septembers as well).',\n",
" 'document': '636f55d5',\n",
" 'question': 'Can I start the course anytime?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The next cohort will start in September 2023. Subsequent iterations are also expected to occur in September 2024 and potentially other Septembers as well.',\n",
" 'answer_orig': 'The course is available in the self-paced mode too, so you can go through the materials at any time. But if you want to do it as a cohort with other students, the next iterations will happen in September 2023, September 2024 (and potentially other Septembers as well).',\n",
" 'document': '636f55d5',\n",
" 'question': 'When is the next cohort starting?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The course materials are available in self-paced mode, so you can go through them at any time. There are no set times for the course materials if you choose this option. If you prefer to progress with a cohort, the next iterations will occur in September 2023 and September 2024.',\n",
" 'answer_orig': 'The course is available in the self-paced mode too, so you can go through the materials at any time. But if you want to do it as a cohort with other students, the next iterations will happen in September 2023, September 2024 (and potentially other Septembers as well).',\n",
" 'document': '636f55d5',\n",
" 'question': 'Are there set times for the course materials?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there will be iterations in future Septembers, specifically in September 2024 and potentially in other Septembers as well.',\n",
" 'answer_orig': 'The course is available in the self-paced mode too, so you can go through the materials at any time. But if you want to do it as a cohort with other students, the next iterations will happen in September 2023, September 2024 (and potentially other Septembers as well).',\n",
" 'document': '636f55d5',\n",
" 'question': 'Will there be iterations in future Septembers?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, you cannot submit your homework after the due date. The form closes after the due date, and late submissions will not be evaluated. However, homework is not mandatory for finishing the course.',\n",
" 'answer_orig': 'No, it’s not possible. The form is closed after the due date. But don’t worry, homework is not mandatory for finishing the course.',\n",
" 'document': 'c839b764',\n",
" 'question': 'Can I send in my homework late?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, it is not feasible to submit homework after the deadline. The form is closed after the due date, and homework is not mandatory for finishing the course.',\n",
" 'answer_orig': 'No, it’s not possible. The form is closed after the due date. But don’t worry, homework is not mandatory for finishing the course.',\n",
" 'document': 'c839b764',\n",
" 'question': 'Is it feasible to submit homework post deadline?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you miss the homework submission deadline, you will not be able to submit the homework as the form is closed after the due date. However, it's important to note that homework is not mandatory for finishing the course.\",\n",
" 'answer_orig': 'No, it’s not possible. The form is closed after the due date. But don’t worry, homework is not mandatory for finishing the course.',\n",
" 'document': 'c839b764',\n",
" 'question': 'What happens if I miss the homework submission deadline?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, it’s not possible to submit homework after the due date. The form is closed after the due date. However, homework is not mandatory for finishing the course.',\n",
" 'answer_orig': 'No, it’s not possible. The form is closed after the due date. But don’t worry, homework is not mandatory for finishing the course.',\n",
" 'document': 'c839b764',\n",
" 'question': 'After the due date, can homework still be submitted?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"No, late homework submissions are not accepted. Once the due date has passed, the submission form is closed, and you cannot submit your homework. However, it's worth noting that homework is not mandatory for completing the course.\",\n",
" 'answer_orig': 'No, it’s not possible. The form is closed after the due date. But don’t worry, homework is not mandatory for finishing the course.',\n",
" 'document': 'c839b764',\n",
" 'question': 'Are late homework submissions accepted?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The initial step after joining the course is to go to the course page (http://mlzoomcamp.com/), scroll down, and start going through the course materials. Then, read everything in the cohort folder for your cohort’s year, and begin watching the videos and office hours from previous cohorts.',\n",
" 'answer_orig': 'Welcome to the course! Go to the course page (http://mlzoomcamp.com/), scroll down and start going through the course materials. Then read everything in the cohort folder for your cohort’s year.\\nClick on the links and start watching the videos. Also watch office hours from previous cohorts. Go to DTC youtube channel and click on Playlists and search for {course yyyy}. ML Zoomcamp was first launched in 2021.\\nOr you can just use this link: http://mlzoomcamp.com/#syllabus',\n",
" 'document': '0a278fb2',\n",
" 'question': 'What is the initial step after joining the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To view the content of the course, go to the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/), scroll down, and start going through the course materials. Be sure to read everything in the cohort folder for your cohort’s year, click on the links, and start watching the videos. You can also access office hours from previous cohorts and check the DTC YouTube channel for playlists related to the course. Alternatively, you can use this link: [http://mlzoomcamp.com/#syllabus](http://mlzoomcamp.com/#syllabus).',\n",
" 'answer_orig': 'Welcome to the course! Go to the course page (http://mlzoomcamp.com/), scroll down and start going through the course materials. Then read everything in the cohort folder for your cohort’s year.\\nClick on the links and start watching the videos. Also watch office hours from previous cohorts. Go to DTC youtube channel and click on Playlists and search for {course yyyy}. ML Zoomcamp was first launched in 2021.\\nOr you can just use this link: http://mlzoomcamp.com/#syllabus',\n",
" 'document': '0a278fb2',\n",
" 'question': 'How can I view the content of the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"You can find the materials related to your cohort by going to the course page at http://mlzoomcamp.com/, scrolling down, and accessing the cohort folder for your specific cohort's year. Make sure to read everything in that folder and click on the links to start watching the videos. Additionally, you can watch office hours from previous cohorts on the DTC YouTube channel by searching for your course year in the Playlists. You can also use this direct link to access the syllabus: http://mlzoomcamp.com/#syllabus.\",\n",
" 'answer_orig': 'Welcome to the course! Go to the course page (http://mlzoomcamp.com/), scroll down and start going through the course materials. Then read everything in the cohort folder for your cohort’s year.\\nClick on the links and start watching the videos. Also watch office hours from previous cohorts. Go to DTC youtube channel and click on Playlists and search for {course yyyy}. ML Zoomcamp was first launched in 2021.\\nOr you can just use this link: http://mlzoomcamp.com/#syllabus',\n",
" 'document': '0a278fb2',\n",
" 'question': 'Where can I find the materials related to my cohort?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To access the videos from previous office hours, go to the DTC YouTube channel, click on Playlists, and search for the relevant course year.',\n",
" 'answer_orig': 'Welcome to the course! Go to the course page (http://mlzoomcamp.com/), scroll down and start going through the course materials. Then read everything in the cohort folder for your cohort’s year.\\nClick on the links and start watching the videos. Also watch office hours from previous cohorts. Go to DTC youtube channel and click on Playlists and search for {course yyyy}. ML Zoomcamp was first launched in 2021.\\nOr you can just use this link: http://mlzoomcamp.com/#syllabus',\n",
" 'document': '0a278fb2',\n",
" 'question': 'How do I access the videos from previous office hours?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'ML Zoomcamp was first launched in 2021.',\n",
" 'answer_orig': 'Welcome to the course! Go to the course page (http://mlzoomcamp.com/), scroll down and start going through the course materials. Then read everything in the cohort folder for your cohort’s year.\\nClick on the links and start watching the videos. Also watch office hours from previous cohorts. Go to DTC youtube channel and click on Playlists and search for {course yyyy}. ML Zoomcamp was first launched in 2021.\\nOr you can just use this link: http://mlzoomcamp.com/#syllabus',\n",
" 'document': '0a278fb2',\n",
" 'question': 'When was ML Zoomcamp first introduced?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find the deadlines for the 2023 cohort by visiting the 2023 cohort page, where the deadlines are listed.',\n",
" 'answer_orig': 'For the 2023 cohort, you can see the deadlines here (it’s taken from the 2023 cohort page)',\n",
" 'document': '8de4fefd',\n",
" 'question': 'Where can I find the deadlines for the 2023 cohort?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The deadline information for the 2023 cohort is available by visiting the designated page (it’s taken from the 2023 cohort page).',\n",
" 'answer_orig': 'For the 2023 cohort, you can see the deadlines here (it’s taken from the 2023 cohort page)',\n",
" 'document': '8de4fefd',\n",
" 'question': 'Where is the deadline information available for the 2023 cohort?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To check the deadlines for the 2023 course, you can see them on the 2023 cohort page.',\n",
" 'answer_orig': 'For the 2023 cohort, you can see the deadlines here (it’s taken from the 2023 cohort page)',\n",
" 'document': '8de4fefd',\n",
" 'question': 'How do I check the deadlines for the 2023 course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can see the deadlines for the 2023 cohort by referring to the 2023 cohort page.',\n",
" 'answer_orig': 'For the 2023 cohort, you can see the deadlines here (it’s taken from the 2023 cohort page)',\n",
" 'document': '8de4fefd',\n",
" 'question': 'Can you show me where to see the 2023 cohort deadlines?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The deadlines for the 2023 cohort are listed on the 2023 cohort page. You can see them [here](it’s taken from the 2023 cohort page).',\n",
" 'answer_orig': 'For the 2023 cohort, you can see the deadlines here (it’s taken from the 2023 cohort page)',\n",
" 'document': '8de4fefd',\n",
" 'question': 'Where are the 2023 cohort deadlines listed?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The main changes between the 2022 and 2023 versions of the course are that there was one special module (BentoML) included in the previous iteration, while the rest of the modules remain the same. Additionally, the homework for the 2023 version is different from that of 2022.',\n",
" 'answer_orig': 'There’s not much difference. There was one special module (BentoML) in the previous iteration of the course, but the rest of the modules are the same as in 2022. The homework this year is different.',\n",
" 'document': '94e86808',\n",
" 'question': 'What has changed between the 2022 and 2023 versions of the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is a module missing in the 2023 course compared to 2022. The special module (BentoML) that was part of the previous iteration of the course in 2022 is not included in the 2023 course. However, the rest of the modules are the same as in 2022.',\n",
" 'answer_orig': 'There’s not much difference. There was one special module (BentoML) in the previous iteration of the course, but the rest of the modules are the same as in 2022. The homework this year is different.',\n",
" 'document': '94e86808',\n",
" 'question': 'Is there any module missing in the 2023 course compared to 2022?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, the homework assignments in the 2023 course are different from those in 2022.',\n",
" 'answer_orig': 'There’s not much difference. There was one special module (BentoML) in the previous iteration of the course, but the rest of the modules are the same as in 2022. The homework this year is different.',\n",
" 'document': '94e86808',\n",
" 'question': 'Are the homework assignments in the 2023 course the same as in 2022?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, BentoML was included in the 2022 course but was a special module that is not part of the 2023 course iteration.',\n",
" 'answer_orig': 'There’s not much difference. There was one special module (BentoML) in the previous iteration of the course, but the rest of the modules are the same as in 2022. The homework this year is different.',\n",
" 'document': '94e86808',\n",
" 'question': 'Was BentoML included in the 2022 course but not in the 2023 course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the majority of modules are unchanged from the 2022 course. There was one special module (BentoML) in the previous iteration, but the rest of the modules remain the same as in 2022.',\n",
" 'answer_orig': 'There’s not much difference. There was one special module (BentoML) in the previous iteration of the course, but the rest of the modules are the same as in 2022. The homework this year is different.',\n",
" 'document': '94e86808',\n",
" 'question': 'Are the majority of modules unchanged from the 2022 course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"We won’t be releasing new course videos; we will be using the videos from 2021. The focus of the course and the skills we want to teach have remained the same, and the videos are still up-to-date. If you haven't taken part in the previous iteration, you can start watching the videos, and it will still be useful for your learning. However, it's recommended to use Python 3.10 instead of Python 3.8.\",\n",
" 'answer_orig': 'We won’t re-record the course videos. The focus of the course and the skills we want to teach remained the same, and the videos are still up-to-date.\\nIf you haven’t taken part in the previous iteration, you can start watching the videos. It’ll be useful for you and you will learn new things. However, we recommend using Python 3.10 now instead of Python 3.8.',\n",
" 'document': 'e7ba6b8a',\n",
" 'question': 'Will you be releasing new course videos or using the ones from 2021?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, the course videos are not being re-recorded for this iteration. The videos from the previous iteration are still up-to-date and will be used.',\n",
" 'answer_orig': 'We won’t re-record the course videos. The focus of the course and the skills we want to teach remained the same, and the videos are still up-to-date.\\nIf you haven’t taken part in the previous iteration, you can start watching the videos. It’ll be useful for you and you will learn new things. However, we recommend using Python 3.10 now instead of Python 3.8.',\n",
" 'document': 'e7ba6b8a',\n",
" 'question': 'Are the course videos being re-recorded for this iteration?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'There is no difference in the skills taught in this iteration compared to 2021. The focus of the course and the skills intended to be taught have remained the same.',\n",
" 'answer_orig': 'We won’t re-record the course videos. The focus of the course and the skills we want to teach remained the same, and the videos are still up-to-date.\\nIf you haven’t taken part in the previous iteration, you can start watching the videos. It’ll be useful for you and you will learn new things. However, we recommend using Python 3.10 now instead of Python 3.8.',\n",
" 'document': 'e7ba6b8a',\n",
" 'question': 'Is there any difference in the skills taught in this iteration compared to 2021?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you should watch the videos even if you didn't take the course in 2021. The videos are still up-to-date, and you will find them useful for learning new things. However, it's recommended to use Python 3.10 instead of Python 3.8.\",\n",
" 'answer_orig': 'We won’t re-record the course videos. The focus of the course and the skills we want to teach remained the same, and the videos are still up-to-date.\\nIf you haven’t taken part in the previous iteration, you can start watching the videos. It’ll be useful for you and you will learn new things. However, we recommend using Python 3.10 now instead of Python 3.8.',\n",
" 'document': 'e7ba6b8a',\n",
" 'question': \"Should I watch the videos if I didn't take the course in 2021?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The recommended Python version for this course iteration is Python 3.10.',\n",
" 'answer_orig': 'We won’t re-record the course videos. The focus of the course and the skills we want to teach remained the same, and the videos are still up-to-date.\\nIf you haven’t taken part in the previous iteration, you can start watching the videos. It’ll be useful for you and you will learn new things. However, we recommend using Python 3.10 now instead of Python 3.8.',\n",
" 'document': 'e7ba6b8a',\n",
" 'question': 'Which Python version is recommended for this course iteration?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'When posting about what you learned from the course on your social media pages, use the tag **#mlzoomcamp**.',\n",
" 'answer_orig': 'When you post about what you learned from the course on your social media pages, use the tag #mlzoomcamp. When you submit your homework, there’s a section in the form for putting the links there. Separate multiple links by any whitespace character (linebreak, space, tab, etc).\\nFor posting the learning in public links, you get extra scores. But the number of scores is limited to 7 points: if you put more than 7 links in your homework form, you’ll get only 7 points.\\nThe same content can be posted to 7 different social sites and still earn you 7 points if you add 7 URLs per week, see Alexey’s reply. (~ ellacharmed)\\nFor midterms/capstones, the awarded points are doubled as the duration is longer. So for projects the points are capped at 14 for 14 URLs.',\n",
" 'document': 'f7bc2f65',\n",
" 'question': 'What tag should I use when posting about my course learning on social media?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To submit your learning in public links when turning in homework, you should include the URLs of your social media posts in the designated section of the homework submission form. You should use the tag #mlzoomcamp when posting about what you learned. If you have multiple links, separate them using any whitespace character such as line breaks, spaces, or tabs. Keep in mind that you can earn a maximum of 7 points for this section, regardless of the number of links you submit; submitting more than 7 links will still cap your score at 7 points.',\n",
" 'answer_orig': 'When you post about what you learned from the course on your social media pages, use the tag #mlzoomcamp. When you submit your homework, there’s a section in the form for putting the links there. Separate multiple links by any whitespace character (linebreak, space, tab, etc).\\nFor posting the learning in public links, you get extra scores. But the number of scores is limited to 7 points: if you put more than 7 links in your homework form, you’ll get only 7 points.\\nThe same content can be posted to 7 different social sites and still earn you 7 points if you add 7 URLs per week, see Alexey’s reply. (~ ellacharmed)\\nFor midterms/capstones, the awarded points are doubled as the duration is longer. So for projects the points are capped at 14 for 14 URLs.',\n",
" 'document': 'f7bc2f65',\n",
" 'question': 'How should I submit my learning in public links when turning in homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, it is not possible to earn more than 7 points for posting learning links in weekly homework. Regardless of the number of links posted, you can only receive a maximum of 7 points.',\n",
" 'answer_orig': 'When you post about what you learned from the course on your social media pages, use the tag #mlzoomcamp. When you submit your homework, there’s a section in the form for putting the links there. Separate multiple links by any whitespace character (linebreak, space, tab, etc).\\nFor posting the learning in public links, you get extra scores. But the number of scores is limited to 7 points: if you put more than 7 links in your homework form, you’ll get only 7 points.\\nThe same content can be posted to 7 different social sites and still earn you 7 points if you add 7 URLs per week, see Alexey’s reply. (~ ellacharmed)\\nFor midterms/capstones, the awarded points are doubled as the duration is longer. So for projects the points are capped at 14 for 14 URLs.',\n",
" 'document': 'f7bc2f65',\n",
" 'question': 'Is it possible to earn more than 7 points for posting learning links in weekly homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can post the same content on multiple social sites to earn points for your homework. You can earn up to 7 points if you add 7 different URLs for your posts, even if the content is the same.',\n",
" 'answer_orig': 'When you post about what you learned from the course on your social media pages, use the tag #mlzoomcamp. When you submit your homework, there’s a section in the form for putting the links there. Separate multiple links by any whitespace character (linebreak, space, tab, etc).\\nFor posting the learning in public links, you get extra scores. But the number of scores is limited to 7 points: if you put more than 7 links in your homework form, you’ll get only 7 points.\\nThe same content can be posted to 7 different social sites and still earn you 7 points if you add 7 URLs per week, see Alexey’s reply. (~ ellacharmed)\\nFor midterms/capstones, the awarded points are doubled as the duration is longer. So for projects the points are capped at 14 for 14 URLs.',\n",
" 'document': 'f7bc2f65',\n",
" 'question': 'Can I post the same content on multiple social sites to earn points for my homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can earn up to 14 points for posting public learning links during midterms and capstones. This is because the points awarded for learning in public links are doubled during this period, allowing you to submit 14 URLs for the projects.',\n",
" 'answer_orig': 'When you post about what you learned from the course on your social media pages, use the tag #mlzoomcamp. When you submit your homework, there’s a section in the form for putting the links there. Separate multiple links by any whitespace character (linebreak, space, tab, etc).\\nFor posting the learning in public links, you get extra scores. But the number of scores is limited to 7 points: if you put more than 7 links in your homework form, you’ll get only 7 points.\\nThe same content can be posted to 7 different social sites and still earn you 7 points if you add 7 URLs per week, see Alexey’s reply. (~ ellacharmed)\\nFor midterms/capstones, the awarded points are doubled as the duration is longer. So for projects the points are capped at 14 for 14 URLs.',\n",
" 'document': 'f7bc2f65',\n",
" 'question': 'How many points can I earn for posting public learning links during midterms and capstones?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Based on the provided context, it does not explicitly mention whether you can share your course projects on a public platform. However, it does state that you can create your own GitHub repository for the course, where you can include your notes, homework, and projects. This implies that sharing your projects in your own repository is permitted, but the context does not provide clear guidance on sharing them publicly beyond that.',\n",
" 'answer_orig': \"You can create your own github repository for the course with your notes, homework, projects, etc.\\nThen fork the original course repo and add a link under the 'Community Notes' section to the notes that are in your own repo.\\nAfter that's done, create a pull request to sync your fork with the original course repo.\\n(By Wesley Barreto)\",\n",
" 'document': 'ae52a907',\n",
" 'question': 'Can I share my course projects on a public platform?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To add your notes to the Community Notes section, you can create your own GitHub repository for the course that includes your notes, homework, and projects. Then, fork the original course repository and add a link to your own repository under the 'Community Notes' section. After that, create a pull request to sync your fork with the original course repository.\",\n",
" 'answer_orig': \"You can create your own github repository for the course with your notes, homework, projects, etc.\\nThen fork the original course repo and add a link under the 'Community Notes' section to the notes that are in your own repo.\\nAfter that's done, create a pull request to sync your fork with the original course repo.\\n(By Wesley Barreto)\",\n",
" 'document': 'ae52a907',\n",
" 'question': 'How do I add my notes to the Community Notes section?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To sync your changes with the original course repo, first, create your own GitHub repository for the course with your notes, homework, projects, etc. Then, fork the original course repo and add a link under the 'Community Notes' section to the notes in your own repo. Once that is completed, create a pull request to sync your fork with the original course repo.\",\n",
" 'answer_orig': \"You can create your own github repository for the course with your notes, homework, projects, etc.\\nThen fork the original course repo and add a link under the 'Community Notes' section to the notes that are in your own repo.\\nAfter that's done, create a pull request to sync your fork with the original course repo.\\n(By Wesley Barreto)\",\n",
" 'document': 'ae52a907',\n",
" 'question': 'What is the process to sync my changes with the original course repo?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"You should host your course-related notes and projects by creating your own GitHub repository for the course. Include your notes, homework, and projects in that repository. Then, fork the original course repository and add a link under the 'Community Notes' section to your own repository. After doing that, create a pull request to sync your fork with the original course repository.\",\n",
" 'answer_orig': \"You can create your own github repository for the course with your notes, homework, projects, etc.\\nThen fork the original course repo and add a link under the 'Community Notes' section to the notes that are in your own repo.\\nAfter that's done, create a pull request to sync your fork with the original course repo.\\n(By Wesley Barreto)\",\n",
" 'document': 'ae52a907',\n",
" 'question': 'Where should I host my course-related notes and projects?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you can add a link to your own repo in the course repository under the 'Community Notes' section. First, create your own GitHub repository for the course and fork the original course repo. After that, you can add the link to your notes in your own repo and then create a pull request to sync your fork with the original course repo.\",\n",
" 'answer_orig': \"You can create your own github repository for the course with your notes, homework, projects, etc.\\nThen fork the original course repo and add a link under the 'Community Notes' section to the notes that are in your own repo.\\nAfter that's done, create a pull request to sync your fork with the original course repo.\\n(By Wesley Barreto)\",\n",
" 'document': 'ae52a907',\n",
" 'question': 'Is there a specific location to add a link to my own repo in the course repository?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find the leaderboard links for 2023 and 2022 at the following URLs:\\n\\n- 2023: [Leaderboard Link](https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml)\\n- 2022: [Leaderboard Link](https://docs.google.com/spreadsheets/d/e/2PACX-1vQzLGpva63gb2rIilFnpZMRSb-buyr5oGh8jmDtIb8DANo4n6hDalra_WRCl4EZwO1JvaC4UIS62n5h/pubhtml)',\n",
" 'answer_orig': \"Leaderboard Links:\\n2023 - https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml\\n2022 - https://docs.google.com/spreadsheets/d/e/2PACX-1vQzLGpva63gb2rIilFnpZMRSb-buyr5oGh8jmDtIb8DANo4n6hDalra_WRCl4EZwO1JvaC4UIS62n5h/pubhtml\\nPython Code:\\nfrom hashlib import sha1\\ndef compute_hash(email):\\nreturn sha1(email.lower().encode('utf-8')).hexdigest()\\nYou need to call the function as follows:\\nprint(compute_hash('YOUR_EMAIL_HERE'))\\nThe quotes are required to denote that your email is a string.\\n(By Wesley Barreto)\\nYou can also use this website directly by entering your email: http://www.sha1-online.com. Then, you just have to copy and paste your hashed email in the “research” bar of the leaderboard to get your scores.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'dab5a24a',\n",
" 'question': 'Where can I find the leaderboard links for 2023 and 2022?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To compute the hash for your email using Python, you can use the following code:\\n\\n```python\\nfrom hashlib import sha1\\n\\ndef compute_hash(email):\\n return sha1(email.lower().encode('utf-8')).hexdigest()\\n\\n# Example usage - replace 'YOUR_EMAIL_HERE' with your actual email address\\nprint(compute_hash('YOUR_EMAIL_HERE'))\\n```\\n\\nMake sure to replace 'YOUR_EMAIL_HERE' with your actual email address, and remember that the quotes are required to denote that your email is a string.\",\n",
" 'answer_orig': \"Leaderboard Links:\\n2023 - https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml\\n2022 - https://docs.google.com/spreadsheets/d/e/2PACX-1vQzLGpva63gb2rIilFnpZMRSb-buyr5oGh8jmDtIb8DANo4n6hDalra_WRCl4EZwO1JvaC4UIS62n5h/pubhtml\\nPython Code:\\nfrom hashlib import sha1\\ndef compute_hash(email):\\nreturn sha1(email.lower().encode('utf-8')).hexdigest()\\nYou need to call the function as follows:\\nprint(compute_hash('YOUR_EMAIL_HERE'))\\nThe quotes are required to denote that your email is a string.\\n(By Wesley Barreto)\\nYou can also use this website directly by entering your email: http://www.sha1-online.com. Then, you just have to copy and paste your hashed email in the “research” bar of the leaderboard to get your scores.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'dab5a24a',\n",
" 'question': 'How do I compute the hash for my email using Python?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, you do not need to format your email in any special way before computing the hash, other than ensuring it is in lowercase. The Python code provided will convert the email to lowercase automatically before computing the hash.',\n",
" 'answer_orig': \"Leaderboard Links:\\n2023 - https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml\\n2022 - https://docs.google.com/spreadsheets/d/e/2PACX-1vQzLGpva63gb2rIilFnpZMRSb-buyr5oGh8jmDtIb8DANo4n6hDalra_WRCl4EZwO1JvaC4UIS62n5h/pubhtml\\nPython Code:\\nfrom hashlib import sha1\\ndef compute_hash(email):\\nreturn sha1(email.lower().encode('utf-8')).hexdigest()\\nYou need to call the function as follows:\\nprint(compute_hash('YOUR_EMAIL_HERE'))\\nThe quotes are required to denote that your email is a string.\\n(By Wesley Barreto)\\nYou can also use this website directly by entering your email: http://www.sha1-online.com. Then, you just have to copy and paste your hashed email in the “research” bar of the leaderboard to get your scores.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'dab5a24a',\n",
" 'question': 'Do I need to format my email in any special way before computing the hash?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you can use an online tool to compute your email's hash. You can enter your email on the website http://www.sha1-online.com to get the hashed value. After obtaining the hash, you can copy and paste it as needed.\",\n",
" 'answer_orig': \"Leaderboard Links:\\n2023 - https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml\\n2022 - https://docs.google.com/spreadsheets/d/e/2PACX-1vQzLGpva63gb2rIilFnpZMRSb-buyr5oGh8jmDtIb8DANo4n6hDalra_WRCl4EZwO1JvaC4UIS62n5h/pubhtml\\nPython Code:\\nfrom hashlib import sha1\\ndef compute_hash(email):\\nreturn sha1(email.lower().encode('utf-8')).hexdigest()\\nYou need to call the function as follows:\\nprint(compute_hash('YOUR_EMAIL_HERE'))\\nThe quotes are required to denote that your email is a string.\\n(By Wesley Barreto)\\nYou can also use this website directly by entering your email: http://www.sha1-online.com. Then, you just have to copy and paste your hashed email in the “research” bar of the leaderboard to get your scores.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'dab5a24a',\n",
" 'question': \"Is there an online tool I can use to compute my email's hash?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To use the hashed email to check your scores on the leaderboard, you need to follow these steps:\\n\\n1. Compute the hash of your email using the provided Python code:\\n ```python\\n from hashlib import sha1\\n def compute_hash(email):\\n return sha1(email.lower().encode(\\'utf-8\\')).hexdigest()\\n \\n # Example usage\\n email = \"YOUR_EMAIL_HERE\"\\n hashed_email = compute_hash(email)\\n print(\"Hashed Email (SHA-1):\", hashed_email)\\n ```\\n Make sure to replace `\"YOUR_EMAIL_HERE\"` with your actual email address.\\n\\n2. Alternatively, you can use the website http://www.sha1-online.com to compute your hashed email.\\n\\n3. After obtaining your hashed email, go to the leaderboard link provided for the corresponding year (for example, 2023:\\n [Leaderboard 2023](https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml)).\\n\\n4. Finally, copy and paste your hashed email into the “research” bar of the leaderboard to find your scores.',\n",
" 'answer_orig': \"Leaderboard Links:\\n2023 - https://docs.google.com/spreadsheets/d/e/2PACX-1vSNK_yGtELX1RJK1SSRl4xiUbD0XZMYS6uwHnybc7Mql-WMnMgO7hHSu59w-1cE7FeFZjkopbh684UE/pubhtml\\n2022 - https://docs.google.com/spreadsheets/d/e/2PACX-1vQzLGpva63gb2rIilFnpZMRSb-buyr5oGh8jmDtIb8DANo4n6hDalra_WRCl4EZwO1JvaC4UIS62n5h/pubhtml\\nPython Code:\\nfrom hashlib import sha1\\ndef compute_hash(email):\\nreturn sha1(email.lower().encode('utf-8')).hexdigest()\\nYou need to call the function as follows:\\nprint(compute_hash('YOUR_EMAIL_HERE'))\\nThe quotes are required to denote that your email is a string.\\n(By Wesley Barreto)\\nYou can also use this website directly by entering your email: http://www.sha1-online.com. Then, you just have to copy and paste your hashed email in the “research” bar of the leaderboard to get your scores.\\n(Mélanie Fouesnard)\",\n",
" 'document': 'dab5a24a',\n",
" 'question': 'How do I use the hashed email to check my scores on the leaderboard?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To resolve the error 'wget is not recognized as an internal or external command' on a Windows system, you need to install wget. The easiest way to do this is by using Chocolatey; you can run the command `choco install wget`. Alternatively, you can download a binary from a source and place it in any location that is part of your PATH, such as `C:/tools/`.\",\n",
" 'answer_orig': 'If you get “wget is not recognized as an internal or external command”, you need to install it.\\nOn Ubuntu, run\\nsudo apt-get install wget\\nOn Windows, the easiest way to install wget is to use Chocolatey:\\nchoco install wget\\nOr you can download a binary from here and put it to any location in your PATH (e.g. C:/tools/)\\nOn Mac, the easiest way to install wget is to use brew.\\nBrew install wget\\nAlternatively, you can use a Python wget library, but instead of simply using “wget” you’ll need eeeto use\\npython -m wget\\nYou need to install it with pip first:\\npip install wget\\nAnd then in your python code, for example in your jupyter notebook, use:\\nimport wget\\nwget.download(\"URL\")\\nThis should download whatever is at the URL in the same directory as your code.\\n(Memoona Tahira)\\nAlternatively, you can read a CSV file from a URL directly with pandas:\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\ndf = pd.read_csv(url)\\nValid URL schemes include http, ftp, s3, gs, and file.\\nIn some cases you might need to bypass https checks:\\nimport ssl\\nssl._create_default_https_context = ssl._create_unverified_context\\nOr you can use the built-in Python functionality for downloading the files:\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\nUrllib.request.urlretrieve() is a standard Python library function available on all devices and platforms. URL requests and URL data retrieval are done with the urllib.request module.\\nThe urlretrieve() function allows you to download files from URLs and save them locally. Python programs use it to download files from the internet.\\nOn any Python-enabled device or platform, you can use the urllib.request.urlretrieve() function to download the file.\\n(Mohammad Emad Sharifi)',\n",
" 'document': '49f9bda9',\n",
" 'question': \"How can I resolve 'wget is not recognized as an internal or external command' on a Windows system?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To install wget on a Mac, follow these steps:\\n\\n1. Open your terminal.\\n2. Use Homebrew to install wget by running the command:\\n ```\\n brew install wget\\n ```\\n\\nAlternatively, you can use a Python wget library by first installing it with pip:\\n1. Install the library by running:\\n ```\\n pip install wget\\n ```\\n2. In your Python code, use it by writing:\\n ```python\\n import wget\\n wget.download(\"URL\")\\n ```\\n\\nThis will download the file from the specified URL.',\n",
" 'answer_orig': 'If you get “wget is not recognized as an internal or external command”, you need to install it.\\nOn Ubuntu, run\\nsudo apt-get install wget\\nOn Windows, the easiest way to install wget is to use Chocolatey:\\nchoco install wget\\nOr you can download a binary from here and put it to any location in your PATH (e.g. C:/tools/)\\nOn Mac, the easiest way to install wget is to use brew.\\nBrew install wget\\nAlternatively, you can use a Python wget library, but instead of simply using “wget” you’ll need eeeto use\\npython -m wget\\nYou need to install it with pip first:\\npip install wget\\nAnd then in your python code, for example in your jupyter notebook, use:\\nimport wget\\nwget.download(\"URL\")\\nThis should download whatever is at the URL in the same directory as your code.\\n(Memoona Tahira)\\nAlternatively, you can read a CSV file from a URL directly with pandas:\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\ndf = pd.read_csv(url)\\nValid URL schemes include http, ftp, s3, gs, and file.\\nIn some cases you might need to bypass https checks:\\nimport ssl\\nssl._create_default_https_context = ssl._create_unverified_context\\nOr you can use the built-in Python functionality for downloading the files:\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\nUrllib.request.urlretrieve() is a standard Python library function available on all devices and platforms. URL requests and URL data retrieval are done with the urllib.request module.\\nThe urlretrieve() function allows you to download files from URLs and save them locally. Python programs use it to download files from the internet.\\nOn any Python-enabled device or platform, you can use the urllib.request.urlretrieve() function to download the file.\\n(Mohammad Emad Sharifi)',\n",
" 'document': '49f9bda9',\n",
" 'question': 'What are the steps to install wget on a Mac?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can use Python to perform the same task as wget. You can utilize the `wget` library in Python by first installing it with pip:\\n\\n```bash\\npip install wget\\n```\\n\\nThen, in your Python code, you can download a file using:\\n\\n```python\\nimport wget\\nwget.download(\"URL\")\\n```\\n\\nThis will download the file located at the specified URL to the same directory as your code.\\n\\nAlternatively, you can use the built-in `urllib` library as follows:\\n\\n```python\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\n```\\n\\nBoth methods will allow you to download files from the internet in a way similar to wget.',\n",
" 'answer_orig': 'If you get “wget is not recognized as an internal or external command”, you need to install it.\\nOn Ubuntu, run\\nsudo apt-get install wget\\nOn Windows, the easiest way to install wget is to use Chocolatey:\\nchoco install wget\\nOr you can download a binary from here and put it to any location in your PATH (e.g. C:/tools/)\\nOn Mac, the easiest way to install wget is to use brew.\\nBrew install wget\\nAlternatively, you can use a Python wget library, but instead of simply using “wget” you’ll need eeeto use\\npython -m wget\\nYou need to install it with pip first:\\npip install wget\\nAnd then in your python code, for example in your jupyter notebook, use:\\nimport wget\\nwget.download(\"URL\")\\nThis should download whatever is at the URL in the same directory as your code.\\n(Memoona Tahira)\\nAlternatively, you can read a CSV file from a URL directly with pandas:\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\ndf = pd.read_csv(url)\\nValid URL schemes include http, ftp, s3, gs, and file.\\nIn some cases you might need to bypass https checks:\\nimport ssl\\nssl._create_default_https_context = ssl._create_unverified_context\\nOr you can use the built-in Python functionality for downloading the files:\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\nUrllib.request.urlretrieve() is a standard Python library function available on all devices and platforms. URL requests and URL data retrieval are done with the urllib.request module.\\nThe urlretrieve() function allows you to download files from URLs and save them locally. Python programs use it to download files from the internet.\\nOn any Python-enabled device or platform, you can use the urllib.request.urlretrieve() function to download the file.\\n(Mohammad Emad Sharifi)',\n",
" 'document': '49f9bda9',\n",
" 'question': 'Can I use Python to perform the same task as wget? If so, how?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can use the function `urllib.request.urlretrieve()` to download files from URLs in Python. This function allows you to retrieve files from the internet and save them locally. Here\\'s an example of how to use it:\\n\\n```python\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\n``` \\n\\nThis will download the file from the specified URL and save it as \"housing.csv\" in your local directory.',\n",
" 'answer_orig': 'If you get “wget is not recognized as an internal or external command”, you need to install it.\\nOn Ubuntu, run\\nsudo apt-get install wget\\nOn Windows, the easiest way to install wget is to use Chocolatey:\\nchoco install wget\\nOr you can download a binary from here and put it to any location in your PATH (e.g. C:/tools/)\\nOn Mac, the easiest way to install wget is to use brew.\\nBrew install wget\\nAlternatively, you can use a Python wget library, but instead of simply using “wget” you’ll need eeeto use\\npython -m wget\\nYou need to install it with pip first:\\npip install wget\\nAnd then in your python code, for example in your jupyter notebook, use:\\nimport wget\\nwget.download(\"URL\")\\nThis should download whatever is at the URL in the same directory as your code.\\n(Memoona Tahira)\\nAlternatively, you can read a CSV file from a URL directly with pandas:\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\ndf = pd.read_csv(url)\\nValid URL schemes include http, ftp, s3, gs, and file.\\nIn some cases you might need to bypass https checks:\\nimport ssl\\nssl._create_default_https_context = ssl._create_unverified_context\\nOr you can use the built-in Python functionality for downloading the files:\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\nUrllib.request.urlretrieve() is a standard Python library function available on all devices and platforms. URL requests and URL data retrieval are done with the urllib.request module.\\nThe urlretrieve() function allows you to download files from URLs and save them locally. Python programs use it to download files from the internet.\\nOn any Python-enabled device or platform, you can use the urllib.request.urlretrieve() function to download the file.\\n(Mohammad Emad Sharifi)',\n",
" 'document': '49f9bda9',\n",
" 'question': \"What function in Python's urllib.request can I use to download files from URLs?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can read a CSV file directly from a URL using pandas with the following code:\\n\\n```python\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\ndf = pd.read_csv(url)\\n```\\n\\nMake sure that the URL is valid; supported URL schemes include http, ftp, s3, gs, and file. If needed, you might have to bypass https checks using:\\n\\n```python\\nimport ssl\\nssl._create_default_https_context = ssl._create_unverified_context\\n```\\n\\nAlternatively, you can use the built-in Python functionality with `urllib.request`:\\n\\n```python\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\n```\\n\\nThis will allow you to download files from URLs and save them locally.',\n",
" 'answer_orig': 'If you get “wget is not recognized as an internal or external command”, you need to install it.\\nOn Ubuntu, run\\nsudo apt-get install wget\\nOn Windows, the easiest way to install wget is to use Chocolatey:\\nchoco install wget\\nOr you can download a binary from here and put it to any location in your PATH (e.g. C:/tools/)\\nOn Mac, the easiest way to install wget is to use brew.\\nBrew install wget\\nAlternatively, you can use a Python wget library, but instead of simply using “wget” you’ll need eeeto use\\npython -m wget\\nYou need to install it with pip first:\\npip install wget\\nAnd then in your python code, for example in your jupyter notebook, use:\\nimport wget\\nwget.download(\"URL\")\\nThis should download whatever is at the URL in the same directory as your code.\\n(Memoona Tahira)\\nAlternatively, you can read a CSV file from a URL directly with pandas:\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\ndf = pd.read_csv(url)\\nValid URL schemes include http, ftp, s3, gs, and file.\\nIn some cases you might need to bypass https checks:\\nimport ssl\\nssl._create_default_https_context = ssl._create_unverified_context\\nOr you can use the built-in Python functionality for downloading the files:\\nimport urllib.request\\nurl = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\nurllib.request.urlretrieve(url, \"housing.csv\")\\nUrllib.request.urlretrieve() is a standard Python library function available on all devices and platforms. URL requests and URL data retrieval are done with the urllib.request module.\\nThe urlretrieve() function allows you to download files from URLs and save them locally. Python programs use it to download files from the internet.\\nOn any Python-enabled device or platform, you can use the urllib.request.urlretrieve() function to download the file.\\n(Mohammad Emad Sharifi)',\n",
" 'document': '49f9bda9',\n",
" 'question': 'How can I read a CSV file directly from a URL using pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To download a CSV file inside a Jupyter notebook, you can use the following methods:\\n\\n1. **Using wget**:\\n ```python\\n !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n ```\\n (The exclamation mark `!` allows you to run shell commands in your notebook.)\\n\\n2. **Using the pandas library**:\\n You can read a CSV file directly from a URL with pandas:\\n ```python\\n import pandas as pd\\n url = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\n df = pd.read_csv(url)\\n ```\\n\\n3. **Using urllib**:\\n You can also download the file using the built-in Python `urllib` module:\\n ```python\\n import urllib.request\\n url = \"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\"\\n urllib.request.urlretrieve(url, \"housing.csv\")\\n ```\\n\\nAll these methods will allow you to download and access the CSV file within your Jupyter notebook environment.',\n",
" 'answer_orig': 'You can use\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nTo download the data too. The exclamation mark !, lets you execute shell commands inside your notebooks. This works generally for shell commands such as ls, cp, mkdir, mv etc . . .\\nFor instance, if you then want to move your data into a data directory alongside your notebook-containing directory, you could execute the following:\\n!mkdir -p ../data/\\n!mv housing.csv ../data/',\n",
" 'document': 'd44de7d1',\n",
" 'question': 'How can I download a CSV file inside a Jupyter notebook?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'In a Jupyter notebook, the exclamation mark (!) is used to run shell commands directly from the notebook. For example, if you use `!ls -lh`, it will execute the `ls -lh` command in the shell and display the output in the notebook. However, if you are using Windows and encounter an error with the `ls` command, you can use `!dir` to achieve similar functionality.',\n",
" 'answer_orig': 'You can use\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nTo download the data too. The exclamation mark !, lets you execute shell commands inside your notebooks. This works generally for shell commands such as ls, cp, mkdir, mv etc . . .\\nFor instance, if you then want to move your data into a data directory alongside your notebook-containing directory, you could execute the following:\\n!mkdir -p ../data/\\n!mv housing.csv ../data/',\n",
" 'document': 'd44de7d1',\n",
" 'question': 'What does the exclamation mark (!) do in a Jupyter notebook?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To execute shell commands like `ls`, `cp`, and `mkdir` in a Jupyter notebook, you can use the `!` operator followed by the command you want to run. \\n\\nFor example:\\n- To list files in a directory, use: `!ls` (on Unix systems) or `!dir` (on Windows systems).\\n- To copy files, use: `!cp <source> <destination>` (on Unix systems) or the equivalent command for Windows.\\n- To create a directory, use: `!mkdir <directory_name>`.\\n\\nRemember that the availability of specific commands may vary based on your operating system. If you're using Windows and encounter errors like `'ls' is not recognized as an internal or external command`, switch to using `!dir` instead.\",\n",
" 'answer_orig': 'You can use\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nTo download the data too. The exclamation mark !, lets you execute shell commands inside your notebooks. This works generally for shell commands such as ls, cp, mkdir, mv etc . . .\\nFor instance, if you then want to move your data into a data directory alongside your notebook-containing directory, you could execute the following:\\n!mkdir -p ../data/\\n!mv housing.csv ../data/',\n",
" 'document': 'd44de7d1',\n",
" 'question': 'How can I execute shell commands like ls, cp, and mkdir in a Jupyter notebook?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To create a directory named 'data', use the command:\\n\\n```\\n!mkdir -p ../data/\\n```\",\n",
" 'answer_orig': 'You can use\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nTo download the data too. The exclamation mark !, lets you execute shell commands inside your notebooks. This works generally for shell commands such as ls, cp, mkdir, mv etc . . .\\nFor instance, if you then want to move your data into a data directory alongside your notebook-containing directory, you could execute the following:\\n!mkdir -p ../data/\\n!mv housing.csv ../data/',\n",
" 'document': 'd44de7d1',\n",
" 'question': \"What command do I use to create a directory named 'data'?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To move a file into a different directory within a Jupyter notebook, you can use the `!mv` command. For example, if you want to move a file named `housing.csv` into a directory called `data`, you can execute the following commands in your notebook:\\n\\n1. First, create the directory if it doesn't exist:\\n ```\\n !mkdir -p ../data/\\n ```\\n\\n2. Then, move the file to that directory:\\n ```\\n !mv housing.csv ../data/\\n ```\\n\\nThis will relocate `housing.csv` into the `data` directory.\",\n",
" 'answer_orig': 'You can use\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nTo download the data too. The exclamation mark !, lets you execute shell commands inside your notebooks. This works generally for shell commands such as ls, cp, mkdir, mv etc . . .\\nFor instance, if you then want to move your data into a data directory alongside your notebook-containing directory, you could execute the following:\\n!mkdir -p ../data/\\n!mv housing.csv ../data/',\n",
" 'document': 'd44de7d1',\n",
" 'question': 'How do I move a file into a different directory within a Jupyter notebook?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To set up a WSL development environment on a Windows 11 device, follow these steps:\\n\\n1. Use the Microsoft Learn link to set up a WSL development environment: [Set up a WSL development environment | Microsoft Learn](https://learn.microsoft.com/en-us/windows/wsl/install).\\n2. Ensure you have the “Virtual Machine Platform” feature activated in your Windows features. You can check this by searching for \"features\" in the research bar.\\n3. Install a Linux distribution such as ‘Ubuntu’ or ‘Debian’ from the Microsoft Store.\\n4. After installation, open the app and create a username and a secure password.\\n5. Once inside the Linux system, you can execute commands like `pwd` to verify you are in Linux.\\n6. To connect to VS Code, download the Microsoft verified VS Code extension ‘WSL’ which allows remote connections to your WSL Ubuntu instance similar to a virtual machine.\\n\\nMake sure your system supports virtualization in the BIOS settings as well.',\n",
" 'answer_orig': '(Tyler Simpson)',\n",
" 'document': '314ebe32',\n",
" 'question': 'How can I set up a WSL development environment on a Windows 11 device?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To connect your WSL Ubuntu instance to VS Code on Windows 11, follow these steps:\\n\\n1. Make sure you have WSL set up on your Windows 11 device. You can refer to the Microsoft Learn link for guidance: Set up a WSL development environment | Microsoft Learn.\\n\\n2. Download and install the Microsoft verified VS Code extension called 'WSL'. This extension allows you to remotely connect to your WSL Ubuntu instance as if it were a virtual machine. \\n\\nFollowing these steps will enable you to use VS Code seamlessly with your WSL Ubuntu environment.\",\n",
" 'answer_orig': '(Tyler Simpson)',\n",
" 'document': '314ebe32',\n",
" 'question': 'How do I connect my WSL Ubuntu instance to VS Code on Windows 11?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To connect VS Code to WSL, you should download the Microsoft verified VS Code extension ‘WSL’. This will allow you to remotely connect to your WSL Ubuntu instance.',\n",
" 'answer_orig': '(Tyler Simpson)',\n",
" 'document': '314ebe32',\n",
" 'question': 'Which extension should I download to connect VS Code to WSL?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can use VS Code to remotely connect to your WSL Ubuntu instance. To do this, you need to download the Microsoft verified VS Code extension ‘WSL’. This will allow you to connect to your WSL Ubuntu instance just as if it were a virtual machine. For more details, you can refer to the Microsoft Learn link on setting up a WSL development environment.',\n",
" 'answer_orig': '(Tyler Simpson)',\n",
" 'document': '314ebe32',\n",
" 'question': 'Can I use VS Code to remotely connect to my WSL Ubuntu instance?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is a guide on Microsoft Learn for setting up WSL on Windows 11. You can find it at the link: Set up a WSL development environment | Microsoft Learn.',\n",
" 'answer_orig': '(Tyler Simpson)',\n",
" 'document': '314ebe32',\n",
" 'question': 'Is there a guide on Microsoft Learn for setting up WSL on Windows 11?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To fix the error message \\'src refspec master does not match any\\' when using git push, you can run the following commands:\\n\\n1. First, create an initial commit with the command:\\n ```\\n git commit -m \"initial commit\"\\n ```\\n2. Then, push your changes to the main branch using:\\n ```\\n git push origin main\\n ```\\n\\nIf this is your first time using GitHub, you might also find a helpful tutorial at this link: https://dennisivy.com/github-quickstart.',\n",
" 'answer_orig': 'This is my first time using Github to upload a code. I was getting the below error message when I type\\ngit push -u origin master:\\nerror: src refspec master does not match any\\nerror: failed to push some refs to \\'https://github.com/XXXXXX/1st-Homework.git\\'\\nSolution:\\nThe error message got fixed by running below commands:\\ngit commit -m \"initial commit\"\\ngit push origin main\\nIf this is your first time to use Github, you will find a great & straightforward tutorial in this link https://dennisivy.com/github-quickstart\\n(Asia Saeed)\\nYou can also use the “upload file” functionality from GitHub for that\\nIf you write your code on Google colab you can also directly share it on your Github.\\n(By Pranab Sarma)',\n",
" 'document': '98cff602',\n",
" 'question': \"How can I fix the error message 'src refspec master does not match any' when using git push?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find a great and straightforward tutorial on using GitHub for the first time at this link: [https://dennisivy.com/github-quickstart](https://dennisivy.com/github-quickstart).',\n",
" 'answer_orig': 'This is my first time using Github to upload a code. I was getting the below error message when I type\\ngit push -u origin master:\\nerror: src refspec master does not match any\\nerror: failed to push some refs to \\'https://github.com/XXXXXX/1st-Homework.git\\'\\nSolution:\\nThe error message got fixed by running below commands:\\ngit commit -m \"initial commit\"\\ngit push origin main\\nIf this is your first time to use Github, you will find a great & straightforward tutorial in this link https://dennisivy.com/github-quickstart\\n(Asia Saeed)\\nYou can also use the “upload file” functionality from GitHub for that\\nIf you write your code on Google colab you can also directly share it on your Github.\\n(By Pranab Sarma)',\n",
" 'document': '98cff602',\n",
" 'question': \"Where can I find a tutorial on using GitHub if it's my first time?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To push an initial commit to GitHub, follow these steps:\\n\\n1. Make sure you have made your changes and are ready to commit them.\\n2. Run the command to commit your changes:\\n ```\\n git commit -m \"initial commit\"\\n ```\\n3. Push your changes to GitHub using:\\n ```\\n git push origin main\\n ```\\n\\nIf you encounter the error message \"src refspec master does not match any,\" ensure you\\'re pushing to the correct branch name, which is typically \"main\" for new repositories. You can also refer to this [GitHub quickstart tutorial](https://dennisivy.com/github-quickstart) for further guidance.',\n",
" 'answer_orig': 'This is my first time using Github to upload a code. I was getting the below error message when I type\\ngit push -u origin master:\\nerror: src refspec master does not match any\\nerror: failed to push some refs to \\'https://github.com/XXXXXX/1st-Homework.git\\'\\nSolution:\\nThe error message got fixed by running below commands:\\ngit commit -m \"initial commit\"\\ngit push origin main\\nIf this is your first time to use Github, you will find a great & straightforward tutorial in this link https://dennisivy.com/github-quickstart\\n(Asia Saeed)\\nYou can also use the “upload file” functionality from GitHub for that\\nIf you write your code on Google colab you can also directly share it on your Github.\\n(By Pranab Sarma)',\n",
" 'document': '98cff602',\n",
" 'question': 'What are the steps to push an initial commit to GitHub?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can use the “upload file” functionality from GitHub to submit your homework.',\n",
" 'answer_orig': 'This is my first time using Github to upload a code. I was getting the below error message when I type\\ngit push -u origin master:\\nerror: src refspec master does not match any\\nerror: failed to push some refs to \\'https://github.com/XXXXXX/1st-Homework.git\\'\\nSolution:\\nThe error message got fixed by running below commands:\\ngit commit -m \"initial commit\"\\ngit push origin main\\nIf this is your first time to use Github, you will find a great & straightforward tutorial in this link https://dennisivy.com/github-quickstart\\n(Asia Saeed)\\nYou can also use the “upload file” functionality from GitHub for that\\nIf you write your code on Google colab you can also directly share it on your Github.\\n(By Pranab Sarma)',\n",
" 'document': '98cff602',\n",
" 'question': \"Can I use the 'upload file' functionality on GitHub to submit my homework?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, it is possible to directly share code from Google Colab to GitHub. You can do this by using the built-in functionality within Google Colab to save your notebooks directly to your GitHub repository.',\n",
" 'answer_orig': 'This is my first time using Github to upload a code. I was getting the below error message when I type\\ngit push -u origin master:\\nerror: src refspec master does not match any\\nerror: failed to push some refs to \\'https://github.com/XXXXXX/1st-Homework.git\\'\\nSolution:\\nThe error message got fixed by running below commands:\\ngit commit -m \"initial commit\"\\ngit push origin main\\nIf this is your first time to use Github, you will find a great & straightforward tutorial in this link https://dennisivy.com/github-quickstart\\n(Asia Saeed)\\nYou can also use the “upload file” functionality from GitHub for that\\nIf you write your code on Google colab you can also directly share it on your Github.\\n(By Pranab Sarma)',\n",
" 'document': '98cff602',\n",
" 'question': 'Is it possible to directly share code from Google Colab to GitHub?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'A singular matrix error occurs during matrix inversion because not every matrix is invertible. In the homework, this error often arises when there is an issue with the order of matrix multiplication. Since multiplication is not commutative, the order of the matrices involved in the operation (using the method .dot) must be respected; otherwise, it can lead to incorrect results and a singular matrix error.',\n",
" 'answer_orig': \"I'm trying to invert the matrix but I got error that the matrix is singular matrix\\nThe singular matrix error is caused by the fact that not every matrix can be inverted. In particular, in the homework it happens because you have to pay close attention when dealing with multiplication (the method .dot) since multiplication is not commutative! X.dot(Y) is not necessarily equal to Y.dot(X), so respect the order otherwise you get the wrong matrix.\",\n",
" 'document': '54ec0de4',\n",
" 'question': 'Why does a singular matrix error occur during matrix inversion in the homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To avoid getting a singular matrix error while using the .dot method in your homework, be sure to pay close attention to the order of your matrix multiplication. Remember that multiplication is not commutative, meaning that \\\\( X.dot(Y) \\\\) is not necessarily equal to \\\\( Y.dot(X) \\\\). Therefore, you need to respect the order in which you perform the multiplication to avoid errors. Additionally, ensure that the matrices you're multiplying are suitable for inversion, as not every matrix can be inverted.\",\n",
" 'answer_orig': \"I'm trying to invert the matrix but I got error that the matrix is singular matrix\\nThe singular matrix error is caused by the fact that not every matrix can be inverted. In particular, in the homework it happens because you have to pay close attention when dealing with multiplication (the method .dot) since multiplication is not commutative! X.dot(Y) is not necessarily equal to Y.dot(X), so respect the order otherwise you get the wrong matrix.\",\n",
" 'document': '54ec0de4',\n",
" 'question': 'How can I avoid getting a singular matrix error while using .dot method in the homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The primary cause of a singular matrix error in matrix multiplication tasks is that not every matrix can be inverted. This error often arises because the order of multiplication matters; specifically, \\\\( X.dot(Y) \\\\) is not necessarily equal to \\\\( Y.dot(X) \\\\). Failing to respect this order can lead to incorrect results and result in a singular matrix error.',\n",
" 'answer_orig': \"I'm trying to invert the matrix but I got error that the matrix is singular matrix\\nThe singular matrix error is caused by the fact that not every matrix can be inverted. In particular, in the homework it happens because you have to pay close attention when dealing with multiplication (the method .dot) since multiplication is not commutative! X.dot(Y) is not necessarily equal to Y.dot(X), so respect the order otherwise you get the wrong matrix.\",\n",
" 'document': '54ec0de4',\n",
" 'question': 'What is the primary cause of a singular matrix error in matrix multiplication tasks?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'In the homework, X.dot(Y) is not necessarily equal to Y.dot(X) because matrix multiplication is not commutative. The order of multiplying matrices matters; for a valid multiplication, the number of columns in the first matrix must match the number of rows in the second matrix. If this condition is not satisfied, or if the matrices are not conformable for multiplication, you may encounter a ValueError due to dimension mismatch.',\n",
" 'answer_orig': \"I'm trying to invert the matrix but I got error that the matrix is singular matrix\\nThe singular matrix error is caused by the fact that not every matrix can be inverted. In particular, in the homework it happens because you have to pay close attention when dealing with multiplication (the method .dot) since multiplication is not commutative! X.dot(Y) is not necessarily equal to Y.dot(X), so respect the order otherwise you get the wrong matrix.\",\n",
" 'document': '54ec0de4',\n",
" 'question': 'In the homework, why is X.dot(Y) not necessarily equal to Y.dot(X)?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To prevent singular matrix errors in the machine learning course, pay close attention to the following:\\n\\n1. **Matrix Multiplication Order**: Use the method `.dot()` for matrix multiplication and remember that multiplication is not commutative. This means that \\\\( X.dot(Y) \\\\) is not necessarily equal to \\\\( Y.dot(X) \\\\). Always respect the order of multiplication to avoid incorrect matrices.\\n\\n2. **Matrix Inversion**: Be cautious about inverting matrices in your code. If you calculate the inverse of a matrix \\\\( X \\\\) once, avoid doing it a second time without ensuring that the matrix has not changed in a way that makes it singular.\\n\\nBy focusing on these aspects, you can minimize the likelihood of encountering singular matrix errors.',\n",
" 'answer_orig': \"I'm trying to invert the matrix but I got error that the matrix is singular matrix\\nThe singular matrix error is caused by the fact that not every matrix can be inverted. In particular, in the homework it happens because you have to pay close attention when dealing with multiplication (the method .dot) since multiplication is not commutative! X.dot(Y) is not necessarily equal to Y.dot(X), so respect the order otherwise you get the wrong matrix.\",\n",
" 'document': '54ec0de4',\n",
" 'question': 'What should I pay close attention to in order to prevent singular matrix errors in the machine learning course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If conda is not recognized as an internal command, you should check if you have Anaconda or Miniconda installed on your system. If not, you need to install one of them first. If you are on Windows and have just installed Anaconda, try using Anaconda’s own terminal called “Anaconda Prompt”.',\n",
" 'answer_orig': 'I have a problem with my terminal. Command\\nconda create -n ml-zoomcamp python=3.9\\ndoesn’t work. Any of 3.8/ 3.9 / 3.10 should be all fine\\nIf you’re on Windows and just installed Anaconda, you can use Anaconda’s own terminal called “Anaconda Prompt”.\\nIf you don’t have Anaconda or Miniconda, you should install it first\\n(Tatyana Mardvilko)',\n",
" 'document': 'f81f4ecb',\n",
" 'question': 'What should I do if conda is not recognized as an internal command?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, the command 'conda create -n ml-zoomcamp python=3.9' will work with Python 3.8. The context states that any of Python versions 3.8, 3.9, or 3.10 should be fine.\",\n",
" 'answer_orig': 'I have a problem with my terminal. Command\\nconda create -n ml-zoomcamp python=3.9\\ndoesn’t work. Any of 3.8/ 3.9 / 3.10 should be all fine\\nIf you’re on Windows and just installed Anaconda, you can use Anaconda’s own terminal called “Anaconda Prompt”.\\nIf you don’t have Anaconda or Miniconda, you should install it first\\n(Tatyana Mardvilko)',\n",
" 'document': 'f81f4ecb',\n",
" 'question': \"Will the command 'conda create -n ml-zoomcamp python=3.9' work with Python 3.8?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"It is not strictly necessary to use Anaconda's own terminal on Windows, but it is recommended. You can use Anaconda Prompt, which is provided as part of the Anaconda installation, to run commands like `conda create` and `conda activate`.\",\n",
" 'answer_orig': 'I have a problem with my terminal. Command\\nconda create -n ml-zoomcamp python=3.9\\ndoesn’t work. Any of 3.8/ 3.9 / 3.10 should be all fine\\nIf you’re on Windows and just installed Anaconda, you can use Anaconda’s own terminal called “Anaconda Prompt”.\\nIf you don’t have Anaconda or Miniconda, you should install it first\\n(Tatyana Mardvilko)',\n",
" 'document': 'f81f4ecb',\n",
" 'question': \"Is it necessary to use Anaconda's own terminal on Windows?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, you need to install Anaconda or Miniconda if you don't already have them.\",\n",
" 'answer_orig': 'I have a problem with my terminal. Command\\nconda create -n ml-zoomcamp python=3.9\\ndoesn’t work. Any of 3.8/ 3.9 / 3.10 should be all fine\\nIf you’re on Windows and just installed Anaconda, you can use Anaconda’s own terminal called “Anaconda Prompt”.\\nIf you don’t have Anaconda or Miniconda, you should install it first\\n(Tatyana Mardvilko)',\n",
" 'document': 'f81f4ecb',\n",
" 'question': \"Do I need to install Anaconda or Miniconda if I don't already have them?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, the specified Python version for the course does not need to be exactly 3.9. While the recommended version for the 2023 cohort is Python 3.10, using Python 3.8 is also mentioned, and the course materials are still relevant regardless of the slight version differences.',\n",
" 'answer_orig': 'I have a problem with my terminal. Command\\nconda create -n ml-zoomcamp python=3.9\\ndoesn’t work. Any of 3.8/ 3.9 / 3.10 should be all fine\\nIf you’re on Windows and just installed Anaconda, you can use Anaconda’s own terminal called “Anaconda Prompt”.\\nIf you don’t have Anaconda or Miniconda, you should install it first\\n(Tatyana Mardvilko)',\n",
" 'document': 'f81f4ecb',\n",
" 'question': 'Does the specified Python version for the course need to be exactly 3.9?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To read a dataset with Pandas in Windows, you may encounter issues if you use backslashes in the file path. In Python, the backslash (`\\\\`) is treated as an escape character, which can cause conflicts. To avoid this, you should add an \"r\" before the file path so that Python treats it as a literal string instead. \\n\\nHere’s how you should load the file:\\n\\n```python\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n```\\n\\nBy using `r` in front of the path, you can successfully read the dataset without running into errors related to escape sequences.',\n",
" 'answer_orig': 'How do I read the dataset with Pandas in Windows?\\nI used the code below but not working\\ndf = pd.read_csv(\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\nUnlike Linux/Mac OS, Windows uses the backslash (\\\\) to navigate the files that cause the conflict with Python. The problem with using the backslash is that in Python, the \\'\\\\\\' has a purpose known as an escape sequence. Escape sequences allow us to include special characters in strings, for example, \"\\\\n\" to add a new line or \"\\\\t\" to add spaces, etc. To avoid the issue we just need to add \"r\" before the file path and Python will treat it as a literal string (not an escape sequence).\\nHere’s how we should be loading the file instead:\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n(Muhammad Awon)',\n",
" 'document': 'be760b92',\n",
" 'question': \"How do I read a dataset with Pandas in Windows when my code doesn't work?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The code `df = pd.read_csv(\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')` does not work in Windows because the backslash (`\\\\`) used in the file path is interpreted by Python as an escape character, which can lead to conflicts. To avoid this issue, you should prepend the file path with an \"r\" to treat it as a raw string, like this: `df = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')`. This way, Python will correctly interpret the backslashes in the file path.',\n",
" 'answer_orig': 'How do I read the dataset with Pandas in Windows?\\nI used the code below but not working\\ndf = pd.read_csv(\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\nUnlike Linux/Mac OS, Windows uses the backslash (\\\\) to navigate the files that cause the conflict with Python. The problem with using the backslash is that in Python, the \\'\\\\\\' has a purpose known as an escape sequence. Escape sequences allow us to include special characters in strings, for example, \"\\\\n\" to add a new line or \"\\\\t\" to add spaces, etc. To avoid the issue we just need to add \"r\" before the file path and Python will treat it as a literal string (not an escape sequence).\\nHere’s how we should be loading the file instead:\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n(Muhammad Awon)',\n",
" 'document': 'be760b92',\n",
" 'question': \"Why does my code df = pd.read_csv('C:\\\\Users\\\\username\\\\Downloads\\\\data.csv') not work in Windows?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The reason behind the backslash causing an issue when reading files in Windows using Pandas is that Windows uses the backslash (\\\\) to navigate files, which conflicts with Python\\'s use of the backslash as an escape sequence. Escape sequences allow for the inclusion of special characters in strings, such as \"\\\\n\" for new lines or \"\\\\t\" for tabs. To avoid this issue, it is recommended to prefix the file path with \"r\" to treat it as a literal string, preventing Python from interpreting the backslashes as escape sequences.',\n",
" 'answer_orig': 'How do I read the dataset with Pandas in Windows?\\nI used the code below but not working\\ndf = pd.read_csv(\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\nUnlike Linux/Mac OS, Windows uses the backslash (\\\\) to navigate the files that cause the conflict with Python. The problem with using the backslash is that in Python, the \\'\\\\\\' has a purpose known as an escape sequence. Escape sequences allow us to include special characters in strings, for example, \"\\\\n\" to add a new line or \"\\\\t\" to add spaces, etc. To avoid the issue we just need to add \"r\" before the file path and Python will treat it as a literal string (not an escape sequence).\\nHere’s how we should be loading the file instead:\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n(Muhammad Awon)',\n",
" 'document': 'be760b92',\n",
" 'question': 'What is the reason behind backslash causing an issue when reading files in Windows using Pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Adding 'r' before the file path in Windows with Pandas indicates that the string is a raw string literal. This means that Python will treat the backslashes in the file path as literal characters rather than escape sequences. This is necessary because in Python, the backslash has a special meaning as an escape character, which can cause conflicts when specifying file paths. By using the raw string format, you can correctly read the file without encountering issues related to escape sequences.\",\n",
" 'answer_orig': 'How do I read the dataset with Pandas in Windows?\\nI used the code below but not working\\ndf = pd.read_csv(\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\nUnlike Linux/Mac OS, Windows uses the backslash (\\\\) to navigate the files that cause the conflict with Python. The problem with using the backslash is that in Python, the \\'\\\\\\' has a purpose known as an escape sequence. Escape sequences allow us to include special characters in strings, for example, \"\\\\n\" to add a new line or \"\\\\t\" to add spaces, etc. To avoid the issue we just need to add \"r\" before the file path and Python will treat it as a literal string (not an escape sequence).\\nHere’s how we should be loading the file instead:\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n(Muhammad Awon)',\n",
" 'document': 'be760b92',\n",
" 'question': \"What does adding 'r' before the file path do when reading a file in Windows with Pandas?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To read a file with Pandas in Windows, you should use the raw string format to avoid issues with the backslash in file paths. Here\\'s an example:\\n\\n```python\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n```\\n\\nBy adding \"r\" before the file path, Python treats it as a literal string, preventing any conflicts with escape sequences.',\n",
" 'answer_orig': 'How do I read the dataset with Pandas in Windows?\\nI used the code below but not working\\ndf = pd.read_csv(\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\nUnlike Linux/Mac OS, Windows uses the backslash (\\\\) to navigate the files that cause the conflict with Python. The problem with using the backslash is that in Python, the \\'\\\\\\' has a purpose known as an escape sequence. Escape sequences allow us to include special characters in strings, for example, \"\\\\n\" to add a new line or \"\\\\t\" to add spaces, etc. To avoid the issue we just need to add \"r\" before the file path and Python will treat it as a literal string (not an escape sequence).\\nHere’s how we should be loading the file instead:\\ndf = pd.read_csv(r\\'C:\\\\Users\\\\username\\\\Downloads\\\\data.csv\\')\\n(Muhammad Awon)',\n",
" 'document': 'be760b92',\n",
" 'question': 'Can you show an example of reading a file with Pandas in Windows?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To fix a \\'403 Forbidden\\' error when pushing to GitHub, you should run the following command:\\n\\n```bash\\ngit config -l | grep url\\n```\\n\\nEnsure that the output looks like this:\\n\\n```\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\n```\\n\\nIf it doesn\\'t, change it to the following format:\\n\\n```bash\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n```',\n",
" 'answer_orig': 'Type the following command:\\ngit config -l | grep url\\nThe output should look like this:\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\nChange this to the following format and make sure the change is reflected using command in step 1:\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n(Added by Dheeraj Karra)',\n",
" 'document': 'a2cfa1c9',\n",
" 'question': \"What command should I run to fix a '403 Forbidden' error when pushing to GitHub?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To check the current URL configuration for your Git repository, you can use the following command:\\n\\n```bash\\ngit config -l | grep url\\n```\\n\\nThe output will show you the URL, formatted like this:\\n\\n```\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\n```',\n",
" 'answer_orig': 'Type the following command:\\ngit config -l | grep url\\nThe output should look like this:\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\nChange this to the following format and make sure the change is reflected using command in step 1:\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n(Added by Dheeraj Karra)',\n",
" 'document': 'a2cfa1c9',\n",
" 'question': 'How can I check the current URL configuration for my Git repository?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Before making changes to fix a GitHub push error, the `remote.origin.url` should look like this:\\n\\n```\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\n```\\n\\nYou should then change it to reflect the following format:\\n\\n```\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n```',\n",
" 'answer_orig': 'Type the following command:\\ngit config -l | grep url\\nThe output should look like this:\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\nChange this to the following format and make sure the change is reflected using command in step 1:\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n(Added by Dheeraj Karra)',\n",
" 'document': 'a2cfa1c9',\n",
" 'question': 'What should the remote.origin.url look like before making changes to fix a GitHub push error?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To change the remote URL and fix a \\'403 Forbidden\\' error when pushing to GitHub, you can follow these steps:\\n\\n1. First, check the current remote URL by typing the following command in your terminal:\\n ```\\n git config -l | grep url\\n ```\\n The output should look like this:\\n ```\\n remote.origin.url=https://github.com/github-username/github-repository-name.git\\n ```\\n\\n2. Change the remote URL to the following format using the command below:\\n ```\\n git remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n ```\\n\\nEnsure that the changes are reflected by running the command in step 1 again.',\n",
" 'answer_orig': 'Type the following command:\\ngit config -l | grep url\\nThe output should look like this:\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\nChange this to the following format and make sure the change is reflected using command in step 1:\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n(Added by Dheeraj Karra)',\n",
" 'document': 'a2cfa1c9',\n",
" 'question': \"How do I change the remote URL to fix a '403 Forbidden' error when pushing to GitHub?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To ensure the changes to the Git URL are correctly applied after updating, you should verify the configuration by using the following command:\\n\\n```\\ngit config -l | grep url\\n```\\n\\nThe output should show the updated URL in the format:\\n\\n```\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\n```\\n\\nMake sure that this reflects the changes you made with the command:\\n\\n```\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n```',\n",
" 'answer_orig': 'Type the following command:\\ngit config -l | grep url\\nThe output should look like this:\\nremote.origin.url=https://github.com/github-username/github-repository-name.git\\nChange this to the following format and make sure the change is reflected using command in step 1:\\ngit remote set-url origin \"https://[email protected]/github-username/github-repository-name.git\"\\n(Added by Dheeraj Karra)',\n",
" 'document': 'a2cfa1c9',\n",
" 'question': 'After updating the Git URL, how can I ensure the changes are correctly applied?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"You are encountering the 'fatal: Authentication failed for https://github.com/username' error because support for password authentication was removed on August 13, 2021. To resolve this issue, you need to create a personal access token from your GitHub account and use it when making a push of your changes. For more information on currently recommended modes of authentication, you can refer to the GitHub documentation at: https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls.\",\n",
" 'answer_orig': \"I had a problem when I tried to push my code from Git Bash:\\nremote: Support for password authentication was removed on August 13, 2021.\\nremote: Please see https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls for information on currently recommended modes of authentication.\\nfatal: Authentication failed for 'https://github.com/username\\nSolution:\\nCreate a personal access token from your github account and use it when you make a push of your last changes.\\nhttps://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent\\nBruno Bedón\",\n",
" 'document': '7b907071',\n",
" 'question': \"Why am I encountering 'fatal: Authentication failed for https://github.com/username' error when pushing code?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If password authentication for GitHub is no longer supported, you should create a personal access token from your GitHub account and use it when you push your code. For detailed information about currently recommended modes of authentication, you can visit this link: [GitHub Docs](https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls).',\n",
" 'answer_orig': \"I had a problem when I tried to push my code from Git Bash:\\nremote: Support for password authentication was removed on August 13, 2021.\\nremote: Please see https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls for information on currently recommended modes of authentication.\\nfatal: Authentication failed for 'https://github.com/username\\nSolution:\\nCreate a personal access token from your github account and use it when you make a push of your last changes.\\nhttps://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent\\nBruno Bedón\",\n",
" 'document': '7b907071',\n",
" 'question': 'What should I do if password authentication for GitHub is no longer supported?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find information on currently recommended modes of authentication for GitHub after password support was removed by visiting the following link: [GitHub Authentication Documentation](https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls).',\n",
" 'answer_orig': \"I had a problem when I tried to push my code from Git Bash:\\nremote: Support for password authentication was removed on August 13, 2021.\\nremote: Please see https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls for information on currently recommended modes of authentication.\\nfatal: Authentication failed for 'https://github.com/username\\nSolution:\\nCreate a personal access token from your github account and use it when you make a push of your last changes.\\nhttps://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent\\nBruno Bedón\",\n",
" 'document': '7b907071',\n",
" 'question': 'Where can I find information on recommended authentications for GitHub after password support was removed?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To configure your GitHub authentication if you can't use a password, you need to create a personal access token from your GitHub account and use that token when pushing changes. Additionally, you can set up SSH keys for authentication by following the instructions provided in the GitHub documentation on generating a new SSH key and adding it to the SSH agent. This way, you can authenticate with GitHub without needing to use a password.\",\n",
" 'answer_orig': \"I had a problem when I tried to push my code from Git Bash:\\nremote: Support for password authentication was removed on August 13, 2021.\\nremote: Please see https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls for information on currently recommended modes of authentication.\\nfatal: Authentication failed for 'https://github.com/username\\nSolution:\\nCreate a personal access token from your github account and use it when you make a push of your last changes.\\nhttps://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent\\nBruno Bedón\",\n",
" 'document': '7b907071',\n",
" 'question': \"How can I configure my GitHub authentication if I can't use a password?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To create a personal access token for GitHub authentication, you can follow the guide provided by GitHub. Specifically, you'll need to create a personal access token from your GitHub account and use it when pushing your changes. For detailed instructions, refer to the GitHub documentation at [this link](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent).\",\n",
" 'answer_orig': \"I had a problem when I tried to push my code from Git Bash:\\nremote: Support for password authentication was removed on August 13, 2021.\\nremote: Please see https://docs.github.com/en/get-started/getting-started-with-git/about-remote-repositories#cloning-with-https-urls for information on currently recommended modes of authentication.\\nfatal: Authentication failed for 'https://github.com/username\\nSolution:\\nCreate a personal access token from your github account and use it when you make a push of your last changes.\\nhttps://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent\\nBruno Bedón\",\n",
" 'document': '7b907071',\n",
" 'question': 'Where do I create a personal access token for GitHub authentication?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To resolve the \"wget: unable to resolve host address\" error in Kaggle, you should turn on the Internet for your session in the Kaggle notebook settings. You can find this option on the settings panel, located on the right-hand side of the Kaggle screen. Additionally, you may need to verify your phone number to confirm that you are not a bot.',\n",
" 'answer_orig': \"In Kaggle, when you are trying to !wget a dataset from github (or any other public repository/location), you get the following error:\\nGetting this error while trying to import data- !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n--2022-09-17 16:55:24-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Temporary failure in name resolution.\\nwget: unable to resolve host address 'raw.githubusercontent.com'\\nSolution:\\nIn your Kaggle notebook settings, turn on the Internet for your session. It's on the settings panel, on the right hand side of the Kaggle screen. You'll be asked to verify your phone number so Kaggle knows you are not a bot.\",\n",
" 'document': 'fc2e0a61',\n",
" 'question': 'How can I resolve the wget: unable to resolve host address error in Kaggle?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'When wget fails to resolve a host address in Kaggle, you should turn on the Internet for your session in your Kaggle notebook settings. This option is found on the settings panel on the right-hand side of the Kaggle screen. You may be required to verify your phone number to confirm that you are not a bot.',\n",
" 'answer_orig': \"In Kaggle, when you are trying to !wget a dataset from github (or any other public repository/location), you get the following error:\\nGetting this error while trying to import data- !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n--2022-09-17 16:55:24-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Temporary failure in name resolution.\\nwget: unable to resolve host address 'raw.githubusercontent.com'\\nSolution:\\nIn your Kaggle notebook settings, turn on the Internet for your session. It's on the settings panel, on the right hand side of the Kaggle screen. You'll be asked to verify your phone number so Kaggle knows you are not a bot.\",\n",
" 'document': 'fc2e0a61',\n",
" 'question': 'What should I do when wget fails to resolve a host address in Kaggle?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To import data with `wget` in Kaggle without encountering errors, make sure to enable the Internet for your session. You can do this by accessing the settings panel on the right-hand side of the Kaggle screen. After enabling the Internet, you can use the command:\\n\\n```bash\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\nIf you still face issues with resolving the host address, double-check that the Internet is indeed enabled for your session.',\n",
" 'answer_orig': \"In Kaggle, when you are trying to !wget a dataset from github (or any other public repository/location), you get the following error:\\nGetting this error while trying to import data- !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n--2022-09-17 16:55:24-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Temporary failure in name resolution.\\nwget: unable to resolve host address 'raw.githubusercontent.com'\\nSolution:\\nIn your Kaggle notebook settings, turn on the Internet for your session. It's on the settings panel, on the right hand side of the Kaggle screen. You'll be asked to verify your phone number so Kaggle knows you are not a bot.\",\n",
" 'document': 'fc2e0a61',\n",
" 'question': 'How to import data with wget in Kaggle without encountering errors?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To fix wget errors in Kaggle, you need to adjust the settings in your Kaggle notebook by turning on the Internet for your session. This option is available on the settings panel on the right-hand side of the Kaggle screen. Please note that you will be asked to verify your phone number to confirm you are not a bot.',\n",
" 'answer_orig': \"In Kaggle, when you are trying to !wget a dataset from github (or any other public repository/location), you get the following error:\\nGetting this error while trying to import data- !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n--2022-09-17 16:55:24-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Temporary failure in name resolution.\\nwget: unable to resolve host address 'raw.githubusercontent.com'\\nSolution:\\nIn your Kaggle notebook settings, turn on the Internet for your session. It's on the settings panel, on the right hand side of the Kaggle screen. You'll be asked to verify your phone number so Kaggle knows you are not a bot.\",\n",
" 'document': 'fc2e0a61',\n",
" 'question': 'What settings need to be adjusted in Kaggle to fix wget errors?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You need to verify your phone number to use wget in Kaggle because turning on the Internet for your session requires this step. Verifying your phone number helps Kaggle ensure that you are not a bot.',\n",
" 'answer_orig': \"In Kaggle, when you are trying to !wget a dataset from github (or any other public repository/location), you get the following error:\\nGetting this error while trying to import data- !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n--2022-09-17 16:55:24-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Temporary failure in name resolution.\\nwget: unable to resolve host address 'raw.githubusercontent.com'\\nSolution:\\nIn your Kaggle notebook settings, turn on the Internet for your session. It's on the settings panel, on the right hand side of the Kaggle screen. You'll be asked to verify your phone number so Kaggle knows you are not a bot.\",\n",
" 'document': 'fc2e0a61',\n",
" 'question': 'Why do I need to verify my phone number to use wget in Kaggle?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You should watch the video titled \"Creating Virtual Environment for Python from VS Code\" to set up a virtual environment for Python in VS Code.',\n",
" 'answer_orig': 'I found this video quite helpful: Creating Virtual Environment for Python from VS Code\\n[Native Jupiter Notebooks support in VS Code] In VS Code you can also have a native Jupiter Notebooks support, i.e. you do not need to open a web browser to code in a Notebook. If you have port forwarding enabled + run a ‘jupyter notebook ‘ command from a remote machine + have a remote connection configured in .ssh/config (as Alexey’s video suggests) - VS Code can execute remote Jupyter Notebooks files on a remote server from your local machine: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.\\n[Git support from VS Code] You can work with Github from VSCode - staging and commits are easy from the VS Code’s UI: https://code.visualstudio.com/docs/sourcecontrol/overview\\n(Added by Ivan Brigida)',\n",
" 'document': 'd43e5742',\n",
" 'question': 'What video should I watch to set up a virtual environment for Python in VS Code?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, it is possible to use Jupyter Notebooks in VS Code without a web browser. VS Code has native support for Jupyter Notebooks, allowing you to code in a Notebook directly within the VS Code environment.',\n",
" 'answer_orig': 'I found this video quite helpful: Creating Virtual Environment for Python from VS Code\\n[Native Jupiter Notebooks support in VS Code] In VS Code you can also have a native Jupiter Notebooks support, i.e. you do not need to open a web browser to code in a Notebook. If you have port forwarding enabled + run a ‘jupyter notebook ‘ command from a remote machine + have a remote connection configured in .ssh/config (as Alexey’s video suggests) - VS Code can execute remote Jupyter Notebooks files on a remote server from your local machine: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.\\n[Git support from VS Code] You can work with Github from VSCode - staging and commits are easy from the VS Code’s UI: https://code.visualstudio.com/docs/sourcecontrol/overview\\n(Added by Ivan Brigida)',\n",
" 'document': 'd43e5742',\n",
" 'question': 'Is it possible to use Jupyter Notebooks in VS Code without a web browser?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To execute remote Jupyter Notebook files from your local machine using VS Code, you need to ensure that you have port forwarding enabled and run the `jupyter notebook` command from the remote machine. Additionally, you must have a remote connection configured in your `.ssh/config` file. Once set up, you can use VS Code's native Jupyter Notebooks support to code in the Notebook without needing to open a web browser. More details can be found in the VS Code Jupyter documentation: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.\",\n",
" 'answer_orig': 'I found this video quite helpful: Creating Virtual Environment for Python from VS Code\\n[Native Jupiter Notebooks support in VS Code] In VS Code you can also have a native Jupiter Notebooks support, i.e. you do not need to open a web browser to code in a Notebook. If you have port forwarding enabled + run a ‘jupyter notebook ‘ command from a remote machine + have a remote connection configured in .ssh/config (as Alexey’s video suggests) - VS Code can execute remote Jupyter Notebooks files on a remote server from your local machine: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.\\n[Git support from VS Code] You can work with Github from VSCode - staging and commits are easy from the VS Code’s UI: https://code.visualstudio.com/docs/sourcecontrol/overview\\n(Added by Ivan Brigida)',\n",
" 'document': 'd43e5742',\n",
" 'question': 'How can I execute remote Jupyter Notebooks files from my local machine using VS Code?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find more information about working with GitHub from VS Code at the following link: https://code.visualstudio.com/docs/sourcecontrol/overview.',\n",
" 'answer_orig': 'I found this video quite helpful: Creating Virtual Environment for Python from VS Code\\n[Native Jupiter Notebooks support in VS Code] In VS Code you can also have a native Jupiter Notebooks support, i.e. you do not need to open a web browser to code in a Notebook. If you have port forwarding enabled + run a ‘jupyter notebook ‘ command from a remote machine + have a remote connection configured in .ssh/config (as Alexey’s video suggests) - VS Code can execute remote Jupyter Notebooks files on a remote server from your local machine: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.\\n[Git support from VS Code] You can work with Github from VSCode - staging and commits are easy from the VS Code’s UI: https://code.visualstudio.com/docs/sourcecontrol/overview\\n(Added by Ivan Brigida)',\n",
" 'document': 'd43e5742',\n",
" 'question': 'Where can I find more information about working with GitHub from VS Code?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To run Jupyter Notebooks remotely using VS Code, you need to ensure the following setup:\\n\\n1. **Port Forwarding**: Make sure that port forwarding is enabled.\\n2. **Jupyter Notebook Command**: Run the command `jupyter notebook` from a remote machine.\\n3. **Remote Connection Configuration**: Configure a remote connection in your `.ssh/config` file.\\n\\nFollowing these steps will allow VS Code to execute remote Jupyter Notebook files from your local machine. For further assistance, you can refer to the official VS Code documentation on Jupyter Notebooks: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.',\n",
" 'answer_orig': 'I found this video quite helpful: Creating Virtual Environment for Python from VS Code\\n[Native Jupiter Notebooks support in VS Code] In VS Code you can also have a native Jupiter Notebooks support, i.e. you do not need to open a web browser to code in a Notebook. If you have port forwarding enabled + run a ‘jupyter notebook ‘ command from a remote machine + have a remote connection configured in .ssh/config (as Alexey’s video suggests) - VS Code can execute remote Jupyter Notebooks files on a remote server from your local machine: https://code.visualstudio.com/docs/datascience/jupyter-notebooks.\\n[Git support from VS Code] You can work with Github from VSCode - staging and commits are easy from the VS Code’s UI: https://code.visualstudio.com/docs/sourcecontrol/overview\\n(Added by Ivan Brigida)',\n",
" 'document': 'd43e5742',\n",
" 'question': 'What setup is necessary to run Jupyter Notebooks remotely using VS Code?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'No, you do not need to run \"conda create -n ....\" every time you start VS Code to work on the project. This command is only necessary the first time to create the environment. After that, you only need to run \"conda activate ml-zoomcamp\" to activate the environment whenever you want to use it.',\n",
" 'answer_orig': 'With regards to creating an environment for the project, do we need to run the command \"conda create -n .......\" and \"conda activate ml-zoomcamp\" everytime we open vs code to work on the project?\\nAnswer:\\n\"conda create -n ....\" is just run the first time to create the environment. Once created, you just need to run \"conda activate ml-zoomcamp\" whenever you want to use it.\\n(Added by Wesley Barreto)\\nconda env export > environment.yml will also allow you to reproduce your existing environment in a YAML file. You can then recreate it with conda env create -f environment.yml',\n",
" 'document': '32bc0538',\n",
" 'question': \"Is it necessary to run 'conda create -n ...' every time I start VS Code to work on the project?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'After creating the environment, you should use the command \"conda activate ml-zoomcamp\" to activate it in VS Code.',\n",
" 'answer_orig': 'With regards to creating an environment for the project, do we need to run the command \"conda create -n .......\" and \"conda activate ml-zoomcamp\" everytime we open vs code to work on the project?\\nAnswer:\\n\"conda create -n ....\" is just run the first time to create the environment. Once created, you just need to run \"conda activate ml-zoomcamp\" whenever you want to use it.\\n(Added by Wesley Barreto)\\nconda env export > environment.yml will also allow you to reproduce your existing environment in a YAML file. You can then recreate it with conda env create -f environment.yml',\n",
" 'document': '32bc0538',\n",
" 'question': 'What command should I use to activate the environment in VS Code after creating it?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To save your current Conda environment to a file, you can use the command:\\n\\n```\\nconda env export > environment.yml\\n```\\n\\nThis will create a YAML file named `environment.yml` that contains all the dependencies of your existing environment. You can then recreate the environment using the following command:\\n\\n```\\nconda env create -f environment.yml\\n```',\n",
" 'answer_orig': 'With regards to creating an environment for the project, do we need to run the command \"conda create -n .......\" and \"conda activate ml-zoomcamp\" everytime we open vs code to work on the project?\\nAnswer:\\n\"conda create -n ....\" is just run the first time to create the environment. Once created, you just need to run \"conda activate ml-zoomcamp\" whenever you want to use it.\\n(Added by Wesley Barreto)\\nconda env export > environment.yml will also allow you to reproduce your existing environment in a YAML file. You can then recreate it with conda env create -f environment.yml',\n",
" 'document': '32bc0538',\n",
" 'question': 'How can I save my current Conda environment to a file?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The command that allows you to recreate an environment from a saved YAML file is `conda env create -f environment.yml`.',\n",
" 'answer_orig': 'With regards to creating an environment for the project, do we need to run the command \"conda create -n .......\" and \"conda activate ml-zoomcamp\" everytime we open vs code to work on the project?\\nAnswer:\\n\"conda create -n ....\" is just run the first time to create the environment. Once created, you just need to run \"conda activate ml-zoomcamp\" whenever you want to use it.\\n(Added by Wesley Barreto)\\nconda env export > environment.yml will also allow you to reproduce your existing environment in a YAML file. You can then recreate it with conda env create -f environment.yml',\n",
" 'document': '32bc0538',\n",
" 'question': 'What command allows me to recreate an environment from a saved YAML file?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'After the Conda environment is created, you need to execute the command `conda activate ml-zoomcamp` to use it.',\n",
" 'answer_orig': 'With regards to creating an environment for the project, do we need to run the command \"conda create -n .......\" and \"conda activate ml-zoomcamp\" everytime we open vs code to work on the project?\\nAnswer:\\n\"conda create -n ....\" is just run the first time to create the environment. Once created, you just need to run \"conda activate ml-zoomcamp\" whenever you want to use it.\\n(Added by Wesley Barreto)\\nconda env export > environment.yml will also allow you to reproduce your existing environment in a YAML file. You can then recreate it with conda env create -f environment.yml',\n",
" 'document': '32bc0538',\n",
" 'question': 'After the Conda environment is created, which command is executed to use it?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'When you invert a matrix and multiply it by the original matrix, you would typically expect to obtain an identity matrix. However, due to floating point precision issues in computer arithmetic, you may not get an exact identity matrix. Instead, you might see results like small numerical errors, for instance:\\n\\n```\\n[[ 1.00000000e+00 -1.38777878e-16]\\n [ 3.16968674e-13 1.00000000e+00]]\\n```\\n\\nThis discrepancy occurs because floating point math can introduce small errors when performing calculations, leading to results that are very close to but not exactly equal to the expected identity matrix.',\n",
" 'answer_orig': \"I was doing Question 7 from Week1 Homework and with step6: Invert XTX, I created the inverse. Now, an inverse when multiplied by the original matrix should return in an Identity matrix. But when I multiplied the inverse with the original matrix, it gave a matrix like this:\\nInverse * Original:\\n[[ 1.00000000e+00 -1.38777878e-16]\\n[ 3.16968674e-13 1.00000000e+00]]\\nSolution:\\nIt's because floating point math doesn't work well on computers as shown here: https://stackoverflow.com/questions/588004/is-floating-point-math-broken\\n(Added by Wesley Barreto)\",\n",
" 'document': 'b6730228',\n",
" 'question': \"If I invert a matrix and multiply by the original, why don't I get an exact identity matrix?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The matrices do not return an exact identity after multiplication due to floating point precision issues in computer calculations. Floating point math can lead to small errors, which means that when the inverse of a matrix is multiplied by the original matrix, the result may be very close to the identity matrix but not exactly equal to it. This can result in values like -1.38777878e-16 or 3.16968674e-13 appearing instead of perfect zeros.',\n",
" 'answer_orig': \"I was doing Question 7 from Week1 Homework and with step6: Invert XTX, I created the inverse. Now, an inverse when multiplied by the original matrix should return in an Identity matrix. But when I multiplied the inverse with the original matrix, it gave a matrix like this:\\nInverse * Original:\\n[[ 1.00000000e+00 -1.38777878e-16]\\n[ 3.16968674e-13 1.00000000e+00]]\\nSolution:\\nIt's because floating point math doesn't work well on computers as shown here: https://stackoverflow.com/questions/588004/is-floating-point-math-broken\\n(Added by Wesley Barreto)\",\n",
" 'document': 'b6730228',\n",
" 'question': 'Why do the matrices not return an exact identity after multiplication?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The imprecise results after matrix inversion can be attributed to the limitations of floating point arithmetic in computers. When you invert a matrix and multiply it by the original matrix, ideally, the result should be an identity matrix. However, due to floating point precision issues, you may encounter very small values close to zero that affect the outcome, resulting in a matrix that resembles an identity matrix but includes tiny discrepancies, such as:\\n\\n```\\n[[ 1.00000000e+00 -1.38777878e-16]\\n [ 3.16968674e-13 1.00000000e+00]]\\n```\\n\\nThese discrepancies arise because floating point math may not produce exact results as expected, which is a known issue in computational mathematics.',\n",
" 'answer_orig': \"I was doing Question 7 from Week1 Homework and with step6: Invert XTX, I created the inverse. Now, an inverse when multiplied by the original matrix should return in an Identity matrix. But when I multiplied the inverse with the original matrix, it gave a matrix like this:\\nInverse * Original:\\n[[ 1.00000000e+00 -1.38777878e-16]\\n[ 3.16968674e-13 1.00000000e+00]]\\nSolution:\\nIt's because floating point math doesn't work well on computers as shown here: https://stackoverflow.com/questions/588004/is-floating-point-math-broken\\n(Added by Wesley Barreto)\",\n",
" 'document': 'b6730228',\n",
" 'question': 'Can you explain the imprecise results after matrix inversion?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The slight errors observed when multiplying an inverted matrix by the original matrix are primarily due to floating point precision issues in computer arithmetic. When performing calculations with floating point numbers, the results may not be exact, leading to very small discrepancies, as illustrated in the example given in the FAQ. This is a common occurrence when dealing with matrix operations in programming and can be attributed to the inherent limitations of floating point representation.',\n",
" 'answer_orig': \"I was doing Question 7 from Week1 Homework and with step6: Invert XTX, I created the inverse. Now, an inverse when multiplied by the original matrix should return in an Identity matrix. But when I multiplied the inverse with the original matrix, it gave a matrix like this:\\nInverse * Original:\\n[[ 1.00000000e+00 -1.38777878e-16]\\n[ 3.16968674e-13 1.00000000e+00]]\\nSolution:\\nIt's because floating point math doesn't work well on computers as shown here: https://stackoverflow.com/questions/588004/is-floating-point-math-broken\\n(Added by Wesley Barreto)\",\n",
" 'document': 'b6730228',\n",
" 'question': 'What causes the slight errors when multiplying an inverted matrix by the original?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Floating-point math affects the accuracy of matrix operations because it doesn't work well on computers, leading to small numerical errors. For example, when calculating the inverse of a matrix and multiplying it by the original matrix, the expected result should ideally be an identity matrix. However, due to the limitations of floating-point arithmetic, the result may deviate slightly, as shown in the example where the product yielded values very close to 1 but not exactly, indicating precision issues inherent in floating-point computations.\",\n",
" 'answer_orig': \"I was doing Question 7 from Week1 Homework and with step6: Invert XTX, I created the inverse. Now, an inverse when multiplied by the original matrix should return in an Identity matrix. But when I multiplied the inverse with the original matrix, it gave a matrix like this:\\nInverse * Original:\\n[[ 1.00000000e+00 -1.38777878e-16]\\n[ 3.16968674e-13 1.00000000e+00]]\\nSolution:\\nIt's because floating point math doesn't work well on computers as shown here: https://stackoverflow.com/questions/588004/is-floating-point-math-broken\\n(Added by Wesley Barreto)\",\n",
" 'document': 'b6730228',\n",
" 'question': 'Why does floating-point math affect the accuracy of matrix operations?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The command that helps print dataset information in pandas is `df.info()`. This function prints information about the dataset, such as the index datatype, number of entries, column information with not-null count and datatype, and memory usage by the dataset.',\n",
" 'answer_orig': 'Answer:\\nIt prints the information about the dataset like:\\nIndex datatype\\nNo. of entries\\nColumn information with not-null count and datatype\\nMemory usage by dataset\\nWe use it as:\\ndf.info()\\n(Added by Aadarsha Shrestha & Emoghena Itakpe)',\n",
" 'document': '3ce9bbb8',\n",
" 'question': 'What command helps print dataset information in pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To check the column info and memory usage of a pandas DataFrame, you can use the `info()` method. It prints details about the dataset, including the index datatype, number of entries, column information with not-null count and datatype, and memory usage by the dataset. You can use it as follows:\\n\\n```python\\ndf.info()\\n```',\n",
" 'answer_orig': 'Answer:\\nIt prints the information about the dataset like:\\nIndex datatype\\nNo. of entries\\nColumn information with not-null count and datatype\\nMemory usage by dataset\\nWe use it as:\\ndf.info()\\n(Added by Aadarsha Shrestha & Emoghena Itakpe)',\n",
" 'document': '3ce9bbb8',\n",
" 'question': 'How to check column info and memory usage of a pandas DataFrame?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To view the index datatype and the number of entries in a dataset, you should use the method `pandas.DataFrame.info()`. You can do this by calling it as follows:\\n\\n```python\\ndf.info()\\n``` \\n\\nThis will print the information about the dataset, including the index datatype and the number of entries.',\n",
" 'answer_orig': 'Answer:\\nIt prints the information about the dataset like:\\nIndex datatype\\nNo. of entries\\nColumn information with not-null count and datatype\\nMemory usage by dataset\\nWe use it as:\\ndf.info()\\n(Added by Aadarsha Shrestha & Emoghena Itakpe)',\n",
" 'document': '3ce9bbb8',\n",
" 'question': 'What method should I use to view index datatype and the number of entries?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The pandas function that prints column data type and not-null count is `pandas.DataFrame.info()`.',\n",
" 'answer_orig': 'Answer:\\nIt prints the information about the dataset like:\\nIndex datatype\\nNo. of entries\\nColumn information with not-null count and datatype\\nMemory usage by dataset\\nWe use it as:\\ndf.info()\\n(Added by Aadarsha Shrestha & Emoghena Itakpe)',\n",
" 'document': '3ce9bbb8',\n",
" 'question': 'Which pandas function prints column data type and not-null count?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To obtain general dataset info in pandas, you can use the `pandas.DataFrame.info()` method. This method prints information about the dataset, including the index datatype, number of entries, column information with not-null count and datatype, and memory usage by the dataset. You can call this method on your dataframe like this:\\n\\n```python\\ndf.info()\\n```',\n",
" 'answer_orig': 'Answer:\\nIt prints the information about the dataset like:\\nIndex datatype\\nNo. of entries\\nColumn information with not-null count and datatype\\nMemory usage by dataset\\nWe use it as:\\ndf.info()\\n(Added by Aadarsha Shrestha & Emoghena Itakpe)',\n",
" 'document': '3ce9bbb8',\n",
" 'question': 'How can I obtain general dataset info in pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you get a NameError for 'np' in your code, it likely means that the numpy library has not been imported. To resolve this, you should ensure that you include the following import statement at the beginning of your code:\\n\\n```python\\nimport numpy as np\\n```\\n\\nIt's also advisable to import the pandas library if you are using it, with the following line as well:\\n\\n```python\\nimport pandas as pd\\n``` \\n\\nMake sure these import statements are included before any other code that uses 'np' or 'pd'.\",\n",
" 'answer_orig': \"Pandas and numpy libraries are not being imported\\nNameError: name 'np' is not defined\\nNameError: name 'pd' is not defined\\nIf you're using numpy or pandas, make sure you use the first few lines before anything else.\\nimport pandas as pd\\nimport numpy as np\\nAdded by Manuel Alejandro Aponte\",\n",
" 'document': '4e584d06',\n",
" 'question': \"What should I do if I get a NameError for 'np' in my code?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To fix a NameError for 'pd' when using pandas, make sure you have imported the pandas library at the beginning of your script. You should include the following line of code before any usage of 'pd':\\n\\n```python\\nimport pandas as pd\\n```\\nIf you are also using numpy, you should import it as well:\\n\\n```python\\nimport numpy as np\\n``` \\n\\nThis ensures that both libraries are properly imported and available for use in your code.\",\n",
" 'answer_orig': \"Pandas and numpy libraries are not being imported\\nNameError: name 'np' is not defined\\nNameError: name 'pd' is not defined\\nIf you're using numpy or pandas, make sure you use the first few lines before anything else.\\nimport pandas as pd\\nimport numpy as np\\nAdded by Manuel Alejandro Aponte\",\n",
" 'document': '4e584d06',\n",
" 'question': \"How can I fix a NameError for 'pd' when using pandas?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If you receive a NameError for 'np', it means that the numpy library has not been imported.\",\n",
" 'answer_orig': \"Pandas and numpy libraries are not being imported\\nNameError: name 'np' is not defined\\nNameError: name 'pd' is not defined\\nIf you're using numpy or pandas, make sure you use the first few lines before anything else.\\nimport pandas as pd\\nimport numpy as np\\nAdded by Manuel Alejandro Aponte\",\n",
" 'document': '4e584d06',\n",
" 'question': \"What libraries might not be imported if I receive a NameError for 'np'?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"You are seeing a NameError for 'pd' in your machine learning script because the pandas library has not been imported. To resolve this issue, make sure to include the following line at the beginning of your script:\\n\\n```python\\nimport pandas as pd\\n```\\n\\nThis will properly define 'pd' so that you can use it in your code.\",\n",
" 'answer_orig': \"Pandas and numpy libraries are not being imported\\nNameError: name 'np' is not defined\\nNameError: name 'pd' is not defined\\nIf you're using numpy or pandas, make sure you use the first few lines before anything else.\\nimport pandas as pd\\nimport numpy as np\\nAdded by Manuel Alejandro Aponte\",\n",
" 'document': '4e584d06',\n",
" 'question': \"Why am I seeing a NameError for 'pd' in my machine learning script?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The solution to the NameError issue in the FAQ section was added by Manuel Alejandro Aponte.',\n",
" 'answer_orig': \"Pandas and numpy libraries are not being imported\\nNameError: name 'np' is not defined\\nNameError: name 'pd' is not defined\\nIf you're using numpy or pandas, make sure you use the first few lines before anything else.\\nimport pandas as pd\\nimport numpy as np\\nAdded by Manuel Alejandro Aponte\",\n",
" 'document': '4e584d06',\n",
" 'question': 'Who added the solution to the NameError issue in the FAQ section?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To select columns by data type in a pandas dataframe, you can use the `select_dtypes` method. For example, to get all columns with numeric data types, you can use:\\n\\n```python\\ndf.select_dtypes(include=np.number).columns.tolist()\\n```\\n\\nTo select columns with object data types, you can use:\\n\\n```python\\ndf.select_dtypes(include='object').columns.tolist()\\n```\",\n",
" 'answer_orig': \"What if there were hundreds of columns? How do you get the columns only with numeric or object data in a more concise way?\\ndf.select_dtypes(include=np.number).columns.tolist()\\ndf.select_dtypes(include='object').columns.tolist()\\nAdded by Gregory Morris\",\n",
" 'document': 'ff4da2b6',\n",
" 'question': 'How can I select columns by data type in pandas dataframe?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'A concise method to get numeric columns in a dataframe is to use the following code:\\n\\n```python\\ndf.select_dtypes(include=np.number).columns.tolist()\\n```',\n",
" 'answer_orig': \"What if there were hundreds of columns? How do you get the columns only with numeric or object data in a more concise way?\\ndf.select_dtypes(include=np.number).columns.tolist()\\ndf.select_dtypes(include='object').columns.tolist()\\nAdded by Gregory Morris\",\n",
" 'document': 'ff4da2b6',\n",
" 'question': 'What is a concise method to get numeric columns in a dataframe?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To list columns with object data type in a dataframe, you can use the following code:\\n\\n```python\\ndf.select_dtypes(include='object').columns.tolist()\\n```\",\n",
" 'answer_orig': \"What if there were hundreds of columns? How do you get the columns only with numeric or object data in a more concise way?\\ndf.select_dtypes(include=np.number).columns.tolist()\\ndf.select_dtypes(include='object').columns.tolist()\\nAdded by Gregory Morris\",\n",
" 'document': 'ff4da2b6',\n",
" 'question': 'How do I list columns with object data type in a dataframe?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"The pandas function to filter columns by dtype is `df.select_dtypes(include=np.number).columns.tolist()` for numeric data and `df.select_dtypes(include='object').columns.tolist()` for object data.\",\n",
" 'answer_orig': \"What if there were hundreds of columns? How do you get the columns only with numeric or object data in a more concise way?\\ndf.select_dtypes(include=np.number).columns.tolist()\\ndf.select_dtypes(include='object').columns.tolist()\\nAdded by Gregory Morris\",\n",
" 'document': 'ff4da2b6',\n",
" 'question': \"What's the pandas function to filter columns by dtype?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The solution for selecting columns by dtype was contributed by Gregory Morris.',\n",
" 'answer_orig': \"What if there were hundreds of columns? How do you get the columns only with numeric or object data in a more concise way?\\ndf.select_dtypes(include=np.number).columns.tolist()\\ndf.select_dtypes(include='object').columns.tolist()\\nAdded by Gregory Morris\",\n",
" 'document': 'ff4da2b6',\n",
" 'question': 'Who contributed the solution for selecting columns by dtype?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To determine the structure of a dataset in Pandas, you can use the `.shape` attribute. This attribute provides information about the number of rows and columns in the dataset. For example, you can identify the number of rows with `df.shape[0]` and the number of columns with `df.shape[1]`. Additionally, you can use `df.info()` to get a detailed summary of the dataset, which includes index datatype, number of entries, column information with not-null count and datatype, and memory usage.',\n",
" 'answer_orig': 'There are many ways to identify the shape of dataset, one of them is using .shape attribute!\\ndf.shape\\ndf.shape[0] # for identify the number of rows\\ndf.shape[1] # for identify the number of columns\\nAdded by Radikal Lukafiardi',\n",
" 'document': '58c1c168',\n",
" 'question': 'How can I determine the structure of a dataset in Pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The attribute in Pandas that helps identify the number of rows in a dataset is the `.shape` attribute. You can use `df.shape[0]` to get the number of rows.',\n",
" 'answer_orig': 'There are many ways to identify the shape of dataset, one of them is using .shape attribute!\\ndf.shape\\ndf.shape[0] # for identify the number of rows\\ndf.shape[1] # for identify the number of columns\\nAdded by Radikal Lukafiardi',\n",
" 'document': '58c1c168',\n",
" 'question': 'What attribute in Pandas helps identify the number of rows in a dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You would use the `.shape` attribute to identify the number of columns in a Pandas DataFrame. Specifically, you can access it with `df.shape[1]` to get the number of columns.',\n",
" 'answer_orig': 'There are many ways to identify the shape of dataset, one of them is using .shape attribute!\\ndf.shape\\ndf.shape[0] # for identify the number of rows\\ndf.shape[1] # for identify the number of columns\\nAdded by Radikal Lukafiardi',\n",
" 'document': '58c1c168',\n",
" 'question': 'Which attribute would you use to identify the number of columns in a Pandas DataFrame?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The information about identifying the shape of a dataset was contributed by Radikal Lukafiardi.',\n",
" 'answer_orig': 'There are many ways to identify the shape of dataset, one of them is using .shape attribute!\\ndf.shape\\ndf.shape[0] # for identify the number of rows\\ndf.shape[1] # for identify the number of columns\\nAdded by Radikal Lukafiardi',\n",
" 'document': '58c1c168',\n",
" 'question': 'Who contributed the information about identifying the shape of a dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'One method to find the shape of a dataset in Pandas is by using the `.shape` attribute. For example, you can use `df.shape` to get the overall shape, `df.shape[0]` to identify the number of rows, and `df.shape[1]` to identify the number of columns.',\n",
" 'answer_orig': 'There are many ways to identify the shape of dataset, one of them is using .shape attribute!\\ndf.shape\\ndf.shape[0] # for identify the number of rows\\ndf.shape[1] # for identify the number of columns\\nAdded by Radikal Lukafiardi',\n",
" 'document': '58c1c168',\n",
" 'question': 'What is one method to find the shape of a dataset in Pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To avoid value errors in your homework when performing matrix multiplication, you should use `np.dot()` or the `@` operator (or `np.matmul()`). It's essential to ensure that the number of columns in the first matrix matches the number of rows in the second matrix for the multiplication to be valid. Remember that the order of multiplication matters and can affect the result.\",\n",
" 'answer_orig': 'First of all use np.dot for matrix multiplication. When you compute matrix-matrix multiplication you should understand that order of multiplying is crucial and affects the result of the multiplication!\\nDimension Mismatch\\nTo perform matrix multiplication, the number of columns in the 1st matrix should match the number of rows in the 2nd matrix. You can rearrange the order to make sure that this satisfies the condition.\\nAdded by Leah Gotladera',\n",
" 'document': '96076a1a',\n",
" 'question': 'What function should I use for matrix multiplication to avoid value errors in our homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The order of matrices is important in matrix multiplication because changing the order changes the result, as they can yield matrices of different sizes and usually different values. Specifically, if you have two matrices A (m x n) and B (n x p), the product A * B results in a matrix C (m x p), while the product B * A results in a matrix D (n x n). Since C and D can be of different dimensions and values, the order of multiplication must be respected to achieve the correct result.',\n",
" 'answer_orig': 'First of all use np.dot for matrix multiplication. When you compute matrix-matrix multiplication you should understand that order of multiplying is crucial and affects the result of the multiplication!\\nDimension Mismatch\\nTo perform matrix multiplication, the number of columns in the 1st matrix should match the number of rows in the 2nd matrix. You can rearrange the order to make sure that this satisfies the condition.\\nAdded by Leah Gotladera',\n",
" 'document': '96076a1a',\n",
" 'question': 'Why is the order of matrices important in matrix multiplication?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To check if the dimensions of two matrices are compatible for multiplication, you need to ensure that the number of columns in the first matrix matches the number of rows in the second matrix. If you encounter a dimension mismatch, consider rearranging the order of the matrices to satisfy this condition.',\n",
" 'answer_orig': 'First of all use np.dot for matrix multiplication. When you compute matrix-matrix multiplication you should understand that order of multiplying is crucial and affects the result of the multiplication!\\nDimension Mismatch\\nTo perform matrix multiplication, the number of columns in the 1st matrix should match the number of rows in the 2nd matrix. You can rearrange the order to make sure that this satisfies the condition.\\nAdded by Leah Gotladera',\n",
" 'document': '96076a1a',\n",
" 'question': 'How can I check if the dimensions of two matrices are compatible for multiplication?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The number of columns in the first matrix should match the number of rows in the second matrix when doing matrix multiplication.',\n",
" 'answer_orig': 'First of all use np.dot for matrix multiplication. When you compute matrix-matrix multiplication you should understand that order of multiplying is crucial and affects the result of the multiplication!\\nDimension Mismatch\\nTo perform matrix multiplication, the number of columns in the 1st matrix should match the number of rows in the 2nd matrix. You can rearrange the order to make sure that this satisfies the condition.\\nAdded by Leah Gotladera',\n",
" 'document': '96076a1a',\n",
" 'question': 'What should the number of columns in the first matrix match when doing matrix multiplication?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The information on avoiding Value errors with array shapes was provided by Leah Gotladera.',\n",
" 'answer_orig': 'First of all use np.dot for matrix multiplication. When you compute matrix-matrix multiplication you should understand that order of multiplying is crucial and affects the result of the multiplication!\\nDimension Mismatch\\nTo perform matrix multiplication, the number of columns in the 1st matrix should match the number of rows in the 2nd matrix. You can rearrange the order to make sure that this satisfies the condition.\\nAdded by Leah Gotladera',\n",
" 'document': '96076a1a',\n",
" 'question': 'Who provided the information on avoiding Value errors with array shapes?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To handle NaN values in a column, you can replace them with the average of that column. First, calculate the average and save it to a variable. Then, replace the NaN values with this average variable. This process is known as imputing, and it allows you to retain rows with valuable information despite the presence of NaN values.',\n",
" 'answer_orig': 'You would first get the average of the column and save it to a variable, then replace the NaN values with the average variable.\\nThis method is called imputing - when you have NaN/ null values in a column, but you do not want to get rid of the row because it has valuable information contributing to other columns.\\nAdded by Anneysha Sarkar',\n",
" 'document': '3218389a',\n",
" 'question': 'How do I handle NaN values in a column?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'We should keep rows with NaN values because they may contain valuable information that contributes to other columns in the dataset. Instead of discarding these rows, we can impute the NaN values (for example, by replacing them with the average of the column) to retain that useful information while still making the dataset usable for analysis.',\n",
" 'answer_orig': 'You would first get the average of the column and save it to a variable, then replace the NaN values with the average variable.\\nThis method is called imputing - when you have NaN/ null values in a column, but you do not want to get rid of the row because it has valuable information contributing to other columns.\\nAdded by Anneysha Sarkar',\n",
" 'document': '3218389a',\n",
" 'question': 'Why should we keep rows with NaN values?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"In machine learning, 'imputing' refers to the method of replacing NaN (null) values in a dataset with a specific value, often the average of the column. This is done instead of removing rows with NaN values to retain valuable information that may contribute to other columns.\",\n",
" 'answer_orig': 'You would first get the average of the column and save it to a variable, then replace the NaN values with the average variable.\\nThis method is called imputing - when you have NaN/ null values in a column, but you do not want to get rid of the row because it has valuable information contributing to other columns.\\nAdded by Anneysha Sarkar',\n",
" 'document': '3218389a',\n",
" 'question': \"What is meant by 'imputing' in machine learning?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Before replacing NaN values, you need to first calculate the average of the column where the NaN values are present and save it to a variable. Then, you can use that average variable to replace the NaN values. This process is known as imputing.',\n",
" 'answer_orig': 'You would first get the average of the column and save it to a variable, then replace the NaN values with the average variable.\\nThis method is called imputing - when you have NaN/ null values in a column, but you do not want to get rid of the row because it has valuable information contributing to other columns.\\nAdded by Anneysha Sarkar',\n",
" 'document': '3218389a',\n",
" 'question': 'What needs to be done before replacing NaN values?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Replacing NaN values with the column average is done through a method called imputing. This is important because it allows you to retain rows that contain valuable information for other columns, rather than discarding those rows just because they have NaN values. By replacing the NaN values with the average of the column, you maintain the integrity of your dataset while addressing the missing data issue.',\n",
" 'answer_orig': 'You would first get the average of the column and save it to a variable, then replace the NaN values with the average variable.\\nThis method is called imputing - when you have NaN/ null values in a column, but you do not want to get rid of the row because it has valuable information contributing to other columns.\\nAdded by Anneysha Sarkar',\n",
" 'document': '3218389a',\n",
" 'question': 'Why replace NaN values with the column average?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The mathematical formula for linear regression mentioned in Question 7 can be summarized as follows: a matrix \\\\( X \\\\) is multiplied by some unknown weights \\\\( w \\\\) resulting in the target \\\\( y \\\\).',\n",
" 'answer_orig': 'In Question 7 we are asked to calculate\\nThe initial problem can be solved by this, where a Matrix X is multiplied by some unknown weights w resulting in the target y.\\nAdditional reading and videos:\\nOrdinary least squares\\nMultiple Linear Regression in Matrix Form\\nPseudoinverse Solution to OLS\\nAdded by Sylvia Schmitt\\nwith commends from Dmytro Durach',\n",
" 'document': '183a1c90',\n",
" 'question': 'What is the mathematical formula for linear regression mentioned in Question 7?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To solve the initial problem involving a Matrix X in linear regression, you need to multiply the Matrix X by some unknown weights w to obtain the target y. This process is fundamental to setting up the linear regression model. For further insights, you may want to explore additional readings such as \"Ordinary least squares,\" \"Multiple Linear Regression in Matrix Form,\" and \"Pseudoinverse Solution to OLS.\"',\n",
" 'answer_orig': 'In Question 7 we are asked to calculate\\nThe initial problem can be solved by this, where a Matrix X is multiplied by some unknown weights w resulting in the target y.\\nAdditional reading and videos:\\nOrdinary least squares\\nMultiple Linear Regression in Matrix Form\\nPseudoinverse Solution to OLS\\nAdded by Sylvia Schmitt\\nwith commends from Dmytro Durach',\n",
" 'document': '183a1c90',\n",
" 'question': 'How can I solve the initial problem involving a Matrix X in linear regression?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The additional reading recommended for understanding ordinary least squares includes:\\n\\n- Ordinary least squares\\n- Multiple Linear Regression in Matrix Form\\n- Pseudoinverse Solution to OLS\\n\\nThese materials were added by Sylvia Schmitt and received comments from Dmytro Durach.',\n",
" 'answer_orig': 'In Question 7 we are asked to calculate\\nThe initial problem can be solved by this, where a Matrix X is multiplied by some unknown weights w resulting in the target y.\\nAdditional reading and videos:\\nOrdinary least squares\\nMultiple Linear Regression in Matrix Form\\nPseudoinverse Solution to OLS\\nAdded by Sylvia Schmitt\\nwith commends from Dmytro Durach',\n",
" 'document': '183a1c90',\n",
" 'question': 'What additional reading is recommended for understanding ordinary least squares?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find more resources about multiple linear regression in matrix form in the additional reading and videos section of the course material. Here are some specific resources mentioned: \\n\\n- Ordinary least squares\\n- Multiple Linear Regression in Matrix Form\\n- Pseudoinverse Solution to OLS\\n\\nThese resources will provide you with valuable information on the topic.',\n",
" 'answer_orig': 'In Question 7 we are asked to calculate\\nThe initial problem can be solved by this, where a Matrix X is multiplied by some unknown weights w resulting in the target y.\\nAdditional reading and videos:\\nOrdinary least squares\\nMultiple Linear Regression in Matrix Form\\nPseudoinverse Solution to OLS\\nAdded by Sylvia Schmitt\\nwith commends from Dmytro Durach',\n",
" 'document': '183a1c90',\n",
" 'question': 'Where can I find more resources about multiple linear regression in matrix form?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The information about the mathematical formula for linear regression was added by Sylvia Schmitt, with comments from Dmytro Durach.',\n",
" 'answer_orig': 'In Question 7 we are asked to calculate\\nThe initial problem can be solved by this, where a Matrix X is multiplied by some unknown weights w resulting in the target y.\\nAdditional reading and videos:\\nOrdinary least squares\\nMultiple Linear Regression in Matrix Form\\nPseudoinverse Solution to OLS\\nAdded by Sylvia Schmitt\\nwith commends from Dmytro Durach',\n",
" 'document': '183a1c90',\n",
" 'question': 'Who added the information about the mathematical formula for linear regression?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The final multiplication does not have 5 columns in section 1 because it is likely that the first step of the multiplication was interchanged, which would lead to an incorrect shape of the resulting matrix.',\n",
" 'answer_orig': 'This is most likely that you interchanged the first step of the multiplication\\nYou used instead of\\nAdded by Emmanuel Ikpesu',\n",
" 'document': 'f0bc1c19',\n",
" 'question': 'Why does the final multiplication not have 5 columns in section 1?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The issue with the final multiplication missing columns is most likely caused by interchanging the first step of the multiplication. This can lead to incorrect dimensions in the resulting matrix.',\n",
" 'answer_orig': 'This is most likely that you interchanged the first step of the multiplication\\nYou used instead of\\nAdded by Emmanuel Ikpesu',\n",
" 'document': 'f0bc1c19',\n",
" 'question': 'What likely caused the issue with the final multiplication missing columns?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, interchanging the first step of multiplication can cause errors. In matrix multiplication, the order is important, as changing the order results in matrices of different sizes and usually different values. This can lead to errors if the dimensions do not align correctly for the intended calculation.',\n",
" 'answer_orig': 'This is most likely that you interchanged the first step of the multiplication\\nYou used instead of\\nAdded by Emmanuel Ikpesu',\n",
" 'document': 'f0bc1c19',\n",
" 'question': 'Can interchanging the first step of multiplication cause errors?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The note about the multiplication step error was added by Emmanuel Ikpesu.',\n",
" 'answer_orig': 'This is most likely that you interchanged the first step of the multiplication\\nYou used instead of\\nAdded by Emmanuel Ikpesu',\n",
" 'document': 'f0bc1c19',\n",
" 'question': 'Who added the note about the multiplication step error?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If your final multiplication in the first section is incorrect, you should check if you interchanged the first step of the multiplication. This may cause issues such as not having the correct number of columns. Additionally, remember that the order of multiplication is crucial when performing matrix operations, as it affects the result. Make sure the number of columns in the first matrix matches the number of rows in the second matrix for proper matrix multiplication.',\n",
" 'answer_orig': 'This is most likely that you interchanged the first step of the multiplication\\nYou used instead of\\nAdded by Emmanuel Ikpesu',\n",
" 'document': 'f0bc1c19',\n",
" 'question': 'What should I check if my final multiplication is incorrect in the first section?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'When performing matrix-matrix multiplication in Python, the preferred operators are `@` or `np.matmul()`.',\n",
" 'answer_orig': 'Note, that matrix multiplication (matrix-matrix, matrix-vector multiplication) can be written as * operator in some sources, but performed as @ operator or np.matmul() via numpy. * operator performs element-wise multiplication (Hadamard product).\\nnumpy.dot() or ndarray.dot() can be used, but for matrix-matrix multiplication @ or np.matmul() is preferred (as per numpy doc).\\nIf multiplying by a scalar numpy.multiply() or * is preferred.\\nAdded by Andrii Larkin',\n",
" 'document': '735e6c78',\n",
" 'question': 'When performing matrix-matrix multiplication in Python, which operator is preferred?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Matrix-vector multiplication can be written as the * operator in some sources, but it is performed as the @ operator or np.matmul() via numpy. It is important to note that the * operator performs element-wise multiplication (Hadamard product).',\n",
" 'answer_orig': 'Note, that matrix multiplication (matrix-matrix, matrix-vector multiplication) can be written as * operator in some sources, but performed as @ operator or np.matmul() via numpy. * operator performs element-wise multiplication (Hadamard product).\\nnumpy.dot() or ndarray.dot() can be used, but for matrix-matrix multiplication @ or np.matmul() is preferred (as per numpy doc).\\nIf multiplying by a scalar numpy.multiply() or * is preferred.\\nAdded by Andrii Larkin',\n",
" 'document': '735e6c78',\n",
" 'question': 'How can matrix-vector multiplication be written in some sources?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The * symbol in numpy performs element-wise multiplication (Hadamard product).',\n",
" 'answer_orig': 'Note, that matrix multiplication (matrix-matrix, matrix-vector multiplication) can be written as * operator in some sources, but performed as @ operator or np.matmul() via numpy. * operator performs element-wise multiplication (Hadamard product).\\nnumpy.dot() or ndarray.dot() can be used, but for matrix-matrix multiplication @ or np.matmul() is preferred (as per numpy doc).\\nIf multiplying by a scalar numpy.multiply() or * is preferred.\\nAdded by Andrii Larkin',\n",
" 'document': '735e6c78',\n",
" 'question': 'What operator does the * symbol perform in numpy?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'For scalar multiplication in numpy, you can use `numpy.multiply()` or the `*` operator.',\n",
" 'answer_orig': 'Note, that matrix multiplication (matrix-matrix, matrix-vector multiplication) can be written as * operator in some sources, but performed as @ operator or np.matmul() via numpy. * operator performs element-wise multiplication (Hadamard product).\\nnumpy.dot() or ndarray.dot() can be used, but for matrix-matrix multiplication @ or np.matmul() is preferred (as per numpy doc).\\nIf multiplying by a scalar numpy.multiply() or * is preferred.\\nAdded by Andrii Larkin',\n",
" 'document': '735e6c78',\n",
" 'question': 'What functions can be used for scalar multiplication in numpy?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The information about multiplication operators was provided by Andrii Larkin.',\n",
" 'answer_orig': 'Note, that matrix multiplication (matrix-matrix, matrix-vector multiplication) can be written as * operator in some sources, but performed as @ operator or np.matmul() via numpy. * operator performs element-wise multiplication (Hadamard product).\\nnumpy.dot() or ndarray.dot() can be used, but for matrix-matrix multiplication @ or np.matmul() is preferred (as per numpy doc).\\nIf multiplying by a scalar numpy.multiply() or * is preferred.\\nAdded by Andrii Larkin',\n",
" 'document': '735e6c78',\n",
" 'question': 'Who provided the information about multiplication operators?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If you encounter an ImportError related to Jinja2 when launching a new Jupyter notebook in a new environment, you should switch to the main environment and run the following command: \\n\\n```\\npip install nbconvert --upgrade\\n```',\n",
" 'answer_orig': 'If you face an error kind of ImportError: cannot import name \\'contextfilter\\' from \\'jinja2\\' (anaconda\\\\lib\\\\site-packages\\\\jinja2\\\\__init__.py) when launching a new notebook for a brand new environment.\\nSwitch to the main environment and run \"pip install nbconvert --upgrade\".\\nAdded by George Chizhmak',\n",
" 'document': 'b8ca1cd3',\n",
" 'question': 'What should I do if I encounter an ImportError when launching a new Jupyter notebook in a new environment?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To resolve the error related to 'contextfilter' from 'jinja2' when starting a Jupyter notebook, switch to the main environment and run the command `pip install nbconvert --upgrade`.\",\n",
" 'answer_orig': 'If you face an error kind of ImportError: cannot import name \\'contextfilter\\' from \\'jinja2\\' (anaconda\\\\lib\\\\site-packages\\\\jinja2\\\\__init__.py) when launching a new notebook for a brand new environment.\\nSwitch to the main environment and run \"pip install nbconvert --upgrade\".\\nAdded by George Chizhmak',\n",
" 'document': 'b8ca1cd3',\n",
" 'question': \"How can I resolve an error related to 'contextfilter' from 'jinja2' when starting a Jupyter notebook?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To fix the ImportError involving 'contextfilter' from 'jinja2', you should switch to the main environment and run the command:\\n\\n```\\npip install nbconvert --upgrade\\n```\",\n",
" 'answer_orig': 'If you face an error kind of ImportError: cannot import name \\'contextfilter\\' from \\'jinja2\\' (anaconda\\\\lib\\\\site-packages\\\\jinja2\\\\__init__.py) when launching a new notebook for a brand new environment.\\nSwitch to the main environment and run \"pip install nbconvert --upgrade\".\\nAdded by George Chizhmak',\n",
" 'document': 'b8ca1cd3',\n",
" 'question': \"What command should I run to fix the ImportError involving 'contextfilter' from 'jinja2'?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The solution for ImportError issues when launching Jupyter notebooks was added by George Chizhmak.',\n",
" 'answer_orig': 'If you face an error kind of ImportError: cannot import name \\'contextfilter\\' from \\'jinja2\\' (anaconda\\\\lib\\\\site-packages\\\\jinja2\\\\__init__.py) when launching a new notebook for a brand new environment.\\nSwitch to the main environment and run \"pip install nbconvert --upgrade\".\\nAdded by George Chizhmak',\n",
" 'document': 'b8ca1cd3',\n",
" 'question': 'Who added the solution for ImportError issues when launching Jupyter notebooks?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To solve the \\'contextfilter\\' ImportError in Jupyter notebook, you need to switch to the main environment and run the command: \"pip install nbconvert --upgrade\".',\n",
" 'answer_orig': 'If you face an error kind of ImportError: cannot import name \\'contextfilter\\' from \\'jinja2\\' (anaconda\\\\lib\\\\site-packages\\\\jinja2\\\\__init__.py) when launching a new notebook for a brand new environment.\\nSwitch to the main environment and run \"pip install nbconvert --upgrade\".\\nAdded by George Chizhmak',\n",
" 'document': 'b8ca1cd3',\n",
" 'question': \"Which package do I need to upgrade to solve the 'contextfilter' ImportError in Jupyter notebook?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If you encounter wget hanging on MacOS Ventura M1 and see IPv6 addresses in the terminal, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK. After making this change, try running wget again.',\n",
" 'answer_orig': 'If you face this situation and see IPv6 addresses in the terminal, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK. Then try again',\n",
" 'document': 'efdb235f',\n",
" 'question': 'How do I fix wget hanging on MacOS Ventura M1?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If `wget` shows IPv6 addresses and hangs, you should go to your System Settings > Network > your network connection > Details > Configure IPv6, set it to Manually, then click OK. After doing this, try using `wget` again.',\n",
" 'answer_orig': 'If you face this situation and see IPv6 addresses in the terminal, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK. Then try again',\n",
" 'document': 'efdb235f',\n",
" 'question': 'What should I do if wget shows IPv6 addresses and hangs?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To configure IPv6 settings on MacOS Ventura M1, go to your System Settings, then select Network. Choose your network connection, click on Details, and under Configure IPv6, set it to Manually. Finally, click OK and try your action again.',\n",
" 'answer_orig': 'If you face this situation and see IPv6 addresses in the terminal, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK. Then try again',\n",
" 'document': 'efdb235f',\n",
" 'question': 'How do I configure IPv6 settings on MacOS Ventura M1?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To set IPv6 to Manually on MacOS Ventura, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK.',\n",
" 'answer_orig': 'If you face this situation and see IPv6 addresses in the terminal, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK. Then try again',\n",
" 'document': 'efdb235f',\n",
" 'question': 'Where can I find the option to set IPv6 to Manually on MacOS Ventura?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Wget does not come pre-installed with macOS, including MacOS M1. If you are facing issues with wget, you can use alternatives such as curl to download files. The recommended command to use with curl is:\\n\\n```bash\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\nIf you specifically want to use wget, you can install it using Homebrew by running:\\n\\n```bash\\nbrew install wget\\n```\\n\\nThis should help resolve any issues you're experiencing with wget on your MacOS M1 device.\",\n",
" 'answer_orig': 'If you face this situation and see IPv6 addresses in the terminal, go to your System Settings > Network > your network connection > Details > Configure IPv6 > set to Manually > OK. Then try again',\n",
" 'document': 'efdb235f',\n",
" 'question': 'Is there a network setting adjustment for wget issues on MacOS M1?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If you are using macOS and having trouble with Wget, you can use `curl` as an alternative. Here is an example of how to use it:\\n\\n```bash\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\n`curl` is a utility for retrieving information from the internet, and this command will store the downloaded file with the name you provide.',\n",
" 'answer_orig': \"Wget doesn't ship with macOS, so there are other alternatives to use.\\nNo worries, we got curl:\\nexample:\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nExplanations:\\ncurl: a utility for retrieving information from the internet.\\n-o: Tell it to store the result as a file.\\nfilename: You choose the file's name.\\nLinks: Put the web address (URL) here, and cURL will extract data from it and save it under the name you provide.\\nMore about it at:\\nCurl Documentation\\nAdded by David Espejo\",\n",
" 'document': '355348f0',\n",
" 'question': 'What alternative can I use to Wget if I am using macOS?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To download a file using curl on macOS, you can use the following command:\\n\\n```bash\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\nIn this command:\\n- `curl` is a utility for retrieving information from the internet.\\n- `-o` tells curl to store the result as a file.\\n- `./housing.csv` is the name you choose for the downloaded file.\\n- The URL provided is the location of the file you want to download. \\n\\nThis command will extract data from the specified URL and save it as `housing.csv` in your current directory.',\n",
" 'answer_orig': \"Wget doesn't ship with macOS, so there are other alternatives to use.\\nNo worries, we got curl:\\nexample:\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nExplanations:\\ncurl: a utility for retrieving information from the internet.\\n-o: Tell it to store the result as a file.\\nfilename: You choose the file's name.\\nLinks: Put the web address (URL) here, and cURL will extract data from it and save it under the name you provide.\\nMore about it at:\\nCurl Documentation\\nAdded by David Espejo\",\n",
" 'document': '355348f0',\n",
" 'question': 'Can you provide an example of using curl to download a file on macOS?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The -o option in the curl command is used to tell curl to store the result as a file. You can choose the filename for the saved file, and curl will extract data from the specified web address (URL) and save it under the name you provide.',\n",
" 'answer_orig': \"Wget doesn't ship with macOS, so there are other alternatives to use.\\nNo worries, we got curl:\\nexample:\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nExplanations:\\ncurl: a utility for retrieving information from the internet.\\n-o: Tell it to store the result as a file.\\nfilename: You choose the file's name.\\nLinks: Put the web address (URL) here, and cURL will extract data from it and save it under the name you provide.\\nMore about it at:\\nCurl Documentation\\nAdded by David Espejo\",\n",
" 'document': '355348f0',\n",
" 'question': 'What does the -o option do in the curl command?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To specify the name of the file when using curl on macOS, you can use the `-o` option followed by the desired filename. For example, the command would look like this: \\n\\n```bash\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\nIn this command, `-o` tells curl to store the result as a file, and `housing.csv` is the name you choose for the file.',\n",
" 'answer_orig': \"Wget doesn't ship with macOS, so there are other alternatives to use.\\nNo worries, we got curl:\\nexample:\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nExplanations:\\ncurl: a utility for retrieving information from the internet.\\n-o: Tell it to store the result as a file.\\nfilename: You choose the file's name.\\nLinks: Put the web address (URL) here, and cURL will extract data from it and save it under the name you provide.\\nMore about it at:\\nCurl Documentation\\nAdded by David Espejo\",\n",
" 'document': '355348f0',\n",
" 'question': 'How do you specify the name of the file when using curl on macOS?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find more information about using curl in the Curl Documentation, as mentioned in the context provided.',\n",
" 'answer_orig': \"Wget doesn't ship with macOS, so there are other alternatives to use.\\nNo worries, we got curl:\\nexample:\\ncurl -o ./housing.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nExplanations:\\ncurl: a utility for retrieving information from the internet.\\n-o: Tell it to store the result as a file.\\nfilename: You choose the file's name.\\nLinks: Put the web address (URL) here, and cURL will extract data from it and save it under the name you provide.\\nMore about it at:\\nCurl Documentation\\nAdded by David Espejo\",\n",
" 'document': '355348f0',\n",
" 'question': 'Where can I find more information about using curl?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"You can use the `round()` function or f-strings to limit the number of decimal places. For example, you can round a number to 4 decimal places using `round(number, 4)`. Alternatively, you can use f-strings like `print(f'Average mark for the Homework is {avg:.3f}')` to format a number to 3 decimal places. Additionally, if you want to round values in a whole Series, you can use `pandas.Series.round()`. Please refer to the documentation for more details: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round.\",\n",
" 'answer_orig': \"You can use round() function or f-strings\\nround(number, 4) - this will round number up to 4 decimal places\\nprint(f'Average mark for the Homework is {avg:.3f}') - using F string\\nAlso there is pandas.Series. round idf you need to round values in the whole Series\\nPlease check the documentation\\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round\\nAdded by Olga Rudakova\",\n",
" 'document': '67afabf5',\n",
" 'question': 'What function can I use to limit a number of decimal places?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To round a number to 4 decimal places in Python, you can use the `round()` function like this:\\n\\n```python\\nrounded_number = round(number, 4)\\n```\\n\\nAdditionally, if you want to format a number while printing, you can use f-strings:\\n\\n```python\\nprint(f'Your number is {number:.4f}')\\n```\\n\\nIf you're working with a pandas Series and want to round all values in the Series, you can utilize the `round()` method available in pandas. For more details, check the pandas documentation at [pandas.Series.round](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round).\",\n",
" 'answer_orig': \"You can use round() function or f-strings\\nround(number, 4) - this will round number up to 4 decimal places\\nprint(f'Average mark for the Homework is {avg:.3f}') - using F string\\nAlso there is pandas.Series. round idf you need to round values in the whole Series\\nPlease check the documentation\\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round\\nAdded by Olga Rudakova\",\n",
" 'document': '67afabf5',\n",
" 'question': 'How can I round a number to 4 decimal places in Python?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To format a number to 3 decimal places using f-strings, you can use the following syntax:\\n\\n```python\\nprint(f'Average mark for the Homework is {avg:.3f}')\\n```\\n\\nThis will display the value of `avg` rounded to 3 decimal places.\",\n",
" 'answer_orig': \"You can use round() function or f-strings\\nround(number, 4) - this will round number up to 4 decimal places\\nprint(f'Average mark for the Homework is {avg:.3f}') - using F string\\nAlso there is pandas.Series. round idf you need to round values in the whole Series\\nPlease check the documentation\\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round\\nAdded by Olga Rudakova\",\n",
" 'document': '67afabf5',\n",
" 'question': 'Can you demonstrate using f-strings to format a number to 3 decimal places?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is a function to round an entire Series in pandas. You can use the `pandas.Series.round()` method to round the values in the whole Series. For more details, you can check the documentation [here](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round).',\n",
" 'answer_orig': \"You can use round() function or f-strings\\nround(number, 4) - this will round number up to 4 decimal places\\nprint(f'Average mark for the Homework is {avg:.3f}') - using F string\\nAlso there is pandas.Series. round idf you need to round values in the whole Series\\nPlease check the documentation\\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round\\nAdded by Olga Rudakova\",\n",
" 'document': '67afabf5',\n",
" 'question': 'Is there a function to round an entire Series in pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find more information about rounding values in pandas Series by checking the documentation for the `pandas.Series.round` function at the following link: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round. This function allows you to round the values in the entire Series.',\n",
" 'answer_orig': \"You can use round() function or f-strings\\nround(number, 4) - this will round number up to 4 decimal places\\nprint(f'Average mark for the Homework is {avg:.3f}') - using F string\\nAlso there is pandas.Series. round idf you need to round values in the whole Series\\nPlease check the documentation\\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.round.html#pandas.Series.round\\nAdded by Olga Rudakova\",\n",
" 'document': '67afabf5',\n",
" 'question': 'Where can I find more information about rounding values in pandas Series?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can ask questions for the Live Sessions for Week 2 at the following link: [Ask questions for Live Sessions](https://app.sli.do/event/vsUpjYsayZ8A875Hq8dpUa/live/questions).',\n",
" 'answer_orig': 'Here are the crucial links for this Week 2 that starts September 18, 2023\\nAsk questions for Live Sessions: https://app.sli.do/event/vsUpjYsayZ8A875Hq8dpUa/live/questions\\nCalendar for weekly meetings: https://calendar.google.com/calendar/u/0/r?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ&pli=1\\nWeek 2 HW: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md\\nSubmit HW Week 2: https://docs.google.com/forms/d/e/1FAIpQLSf8eMtnErPFqzzFsEdLap_GZ2sMih-H-Y7F_IuPGqt4fOmOJw/viewform (also available at the bottom of the above link)\\nAll HWs: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/\\nGitHub for theory: https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/course-zoomcamp\\nYoutube Link: 2.X --- https://www.youtube.com/watch?v=vM3SqPNlStE&list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR&index=12\\nFAQs: https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit#heading=h.lpz96zg7l47j\\n~~Nukta Bhatia~~',\n",
" 'document': '50d737e7',\n",
" 'question': 'Where can I ask questions for the Live Sessions for Week 2?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The start date for Week 2 of the Machine Learning for Regression course is September 18, 2023.',\n",
" 'answer_orig': 'Here are the crucial links for this Week 2 that starts September 18, 2023\\nAsk questions for Live Sessions: https://app.sli.do/event/vsUpjYsayZ8A875Hq8dpUa/live/questions\\nCalendar for weekly meetings: https://calendar.google.com/calendar/u/0/r?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ&pli=1\\nWeek 2 HW: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md\\nSubmit HW Week 2: https://docs.google.com/forms/d/e/1FAIpQLSf8eMtnErPFqzzFsEdLap_GZ2sMih-H-Y7F_IuPGqt4fOmOJw/viewform (also available at the bottom of the above link)\\nAll HWs: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/\\nGitHub for theory: https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/course-zoomcamp\\nYoutube Link: 2.X --- https://www.youtube.com/watch?v=vM3SqPNlStE&list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR&index=12\\nFAQs: https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit#heading=h.lpz96zg7l47j\\n~~Nukta Bhatia~~',\n",
" 'document': '50d737e7',\n",
" 'question': 'What is the start date for Week 2 of the Machine Learning for Regression course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find the Week 2 homework for the course at the following link: [Week 2 HW](https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md).',\n",
" 'answer_orig': 'Here are the crucial links for this Week 2 that starts September 18, 2023\\nAsk questions for Live Sessions: https://app.sli.do/event/vsUpjYsayZ8A875Hq8dpUa/live/questions\\nCalendar for weekly meetings: https://calendar.google.com/calendar/u/0/r?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ&pli=1\\nWeek 2 HW: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md\\nSubmit HW Week 2: https://docs.google.com/forms/d/e/1FAIpQLSf8eMtnErPFqzzFsEdLap_GZ2sMih-H-Y7F_IuPGqt4fOmOJw/viewform (also available at the bottom of the above link)\\nAll HWs: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/\\nGitHub for theory: https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/course-zoomcamp\\nYoutube Link: 2.X --- https://www.youtube.com/watch?v=vM3SqPNlStE&list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR&index=12\\nFAQs: https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit#heading=h.lpz96zg7l47j\\n~~Nukta Bhatia~~',\n",
" 'document': '50d737e7',\n",
" 'question': 'Where can I find the Week 2 homework for the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can submit the Week 2 homework using the following link: [Submit HW Week 2](https://docs.google.com/forms/d/e/1FAIpQLSf8eMtnErPFqzzFsEdLap_GZ2sMih-H-Y7F_IuPGqt4fOmOJw/viewform). This link is also available at the bottom of the Week 2 homework page: [Week 2 HW](https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md).',\n",
" 'answer_orig': 'Here are the crucial links for this Week 2 that starts September 18, 2023\\nAsk questions for Live Sessions: https://app.sli.do/event/vsUpjYsayZ8A875Hq8dpUa/live/questions\\nCalendar for weekly meetings: https://calendar.google.com/calendar/u/0/r?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ&pli=1\\nWeek 2 HW: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md\\nSubmit HW Week 2: https://docs.google.com/forms/d/e/1FAIpQLSf8eMtnErPFqzzFsEdLap_GZ2sMih-H-Y7F_IuPGqt4fOmOJw/viewform (also available at the bottom of the above link)\\nAll HWs: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/\\nGitHub for theory: https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/course-zoomcamp\\nYoutube Link: 2.X --- https://www.youtube.com/watch?v=vM3SqPNlStE&list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR&index=12\\nFAQs: https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit#heading=h.lpz96zg7l47j\\n~~Nukta Bhatia~~',\n",
" 'document': '50d737e7',\n",
" 'question': 'How can I submit the Week 2 homework?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can check the calendar for the weekly meetings at this link: [Calendar for weekly meetings](https://calendar.google.com/calendar/u/0/r?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ&pli=1).',\n",
" 'answer_orig': 'Here are the crucial links for this Week 2 that starts September 18, 2023\\nAsk questions for Live Sessions: https://app.sli.do/event/vsUpjYsayZ8A875Hq8dpUa/live/questions\\nCalendar for weekly meetings: https://calendar.google.com/calendar/u/0/r?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ&pli=1\\nWeek 2 HW: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/02-regression/homework.md\\nSubmit HW Week 2: https://docs.google.com/forms/d/e/1FAIpQLSf8eMtnErPFqzzFsEdLap_GZ2sMih-H-Y7F_IuPGqt4fOmOJw/viewform (also available at the bottom of the above link)\\nAll HWs: https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/cohorts/2023/\\nGitHub for theory: https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/course-zoomcamp\\nYoutube Link: 2.X --- https://www.youtube.com/watch?v=vM3SqPNlStE&list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR&index=12\\nFAQs: https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit#heading=h.lpz96zg7l47j\\n~~Nukta Bhatia~~',\n",
" 'document': '50d737e7',\n",
" 'question': 'Where can I check the calendar for the weekly meetings?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To visualize the distribution of the `median_house_value` in the housing dataset, you can use a histogram. Here\\'s an example of how to do this using Python:\\n\\n```python\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n\\n# Load the data\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\ndf = pd.read_csv(url)\\n\\n# EDA\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\n```\\n\\nAlternatively, you can check the skewness of the `median_house_value` and use the `describe` method:\\n\\n```python\\n# Check skewness and describe\\nprint(df[\\'median_house_value\\'].describe())\\n\\n# Calculate the skewness of the \\'median_house_value\\' variable\\nskewness = df[\\'median_house_value\\'].skew()\\nprint(\"Skewness of \\'median_house_value\\':\", skewness)\\n```',\n",
" 'answer_orig': 'We can use histogram:\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n# Load the data\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\ndf = pd.read_csv(url)\\n# EDA\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\nOR ceck skewness and describe:\\nprint(df[\\'median_house_value\\'].describe())\\n# Calculate the skewness of the \\'median_house_value\\' variable\\nskewness = df[\\'median_house_value\\'].skew()\\n# Print the skewness value\\nprint(\"Skewness of \\'median_house_value\\':\", skewness)\\n(Mohammad Emad Sharifi)',\n",
" 'document': 'bbc0fca3',\n",
" 'question': 'How can we visualize the distribution of the median_house_value in the housing dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To check the skewness of the `median_house_value` variable from the dataset, you can use the following method:\\n\\n1. Calculate the skewness of the variable using the `skew()` function in pandas:\\n ```python\\n skewness = df[\\'median_house_value\\'].skew()\\n ```\\n\\n2. Print the skewness value:\\n ```python\\n print(\"Skewness of \\'median_house_value\\':\", skewness)\\n ```\\n\\nAdditionally, you can visualize the distribution of the variable using a histogram with seaborn:\\n```python\\nimport seaborn as sns\\nimport matplotlib.pyplot as plt\\n\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\n```\\n\\nThese methods will help you check the skewness of the `median_house_value` variable in the dataset.',\n",
" 'answer_orig': 'We can use histogram:\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n# Load the data\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\ndf = pd.read_csv(url)\\n# EDA\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\nOR ceck skewness and describe:\\nprint(df[\\'median_house_value\\'].describe())\\n# Calculate the skewness of the \\'median_house_value\\' variable\\nskewness = df[\\'median_house_value\\'].skew()\\n# Print the skewness value\\nprint(\"Skewness of \\'median_house_value\\':\", skewness)\\n(Mohammad Emad Sharifi)',\n",
" 'document': 'bbc0fca3',\n",
" 'question': 'What is a method to check the skewness of the median_house_value variable from the dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To generate a histogram for the `median_house_value`, the following Python libraries are needed:\\n\\n1. `pandas` - for data manipulation and analysis.\\n2. `matplotlib.pyplot` - for plotting graphs.\\n3. `seaborn` - for advanced data visualization.\\n\\nYou can use them as shown in the example:\\n\\n```python\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n```',\n",
" 'answer_orig': 'We can use histogram:\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n# Load the data\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\ndf = pd.read_csv(url)\\n# EDA\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\nOR ceck skewness and describe:\\nprint(df[\\'median_house_value\\'].describe())\\n# Calculate the skewness of the \\'median_house_value\\' variable\\nskewness = df[\\'median_house_value\\'].skew()\\n# Print the skewness value\\nprint(\"Skewness of \\'median_house_value\\':\", skewness)\\n(Mohammad Emad Sharifi)',\n",
" 'document': 'bbc0fca3',\n",
" 'question': 'Which Python libraries are needed to generate a histogram for the median_house_value?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"The command `sns.histplot(df['median_house_value'], kde=False)` creates a histogram of the 'median_house_value' column from the DataFrame `df`. The parameter `kde=False` indicates that no Kernel Density Estimate (KDE) curve will be added to the plot. This visual representation helps to understand the distribution and potential long tail of the 'median_house_value' data.\",\n",
" 'answer_orig': 'We can use histogram:\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n# Load the data\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\ndf = pd.read_csv(url)\\n# EDA\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\nOR ceck skewness and describe:\\nprint(df[\\'median_house_value\\'].describe())\\n# Calculate the skewness of the \\'median_house_value\\' variable\\nskewness = df[\\'median_house_value\\'].skew()\\n# Print the skewness value\\nprint(\"Skewness of \\'median_house_value\\':\", skewness)\\n(Mohammad Emad Sharifi)',\n",
" 'document': 'bbc0fca3',\n",
" 'question': \"What does the command sns.histplot(df['median_house_value'], kde=False) do?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Checking the distribution or skewness of the target variable is important in machine learning for regression because it helps to understand the characteristics of the data set. If the target variable is highly skewed, transforming it using logarithm distribution can be beneficial. This transformation can help to stabilize variance and make the model's assumptions more valid, thus potentially improving the performance of the regression model. Plotting the distribution of the target variable is the easiest way to evaluate its skewness.\",\n",
" 'answer_orig': 'We can use histogram:\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nimport seaborn as sns\\n# Load the data\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\ndf = pd.read_csv(url)\\n# EDA\\nsns.histplot(df[\\'median_house_value\\'], kde=False)\\nplt.show()\\nOR ceck skewness and describe:\\nprint(df[\\'median_house_value\\'].describe())\\n# Calculate the skewness of the \\'median_house_value\\' variable\\nskewness = df[\\'median_house_value\\'].skew()\\n# Print the skewness value\\nprint(\"Skewness of \\'median_house_value\\':\", skewness)\\n(Mohammad Emad Sharifi)',\n",
" 'document': 'bbc0fca3',\n",
" 'question': 'Why is checking the distribution or skewness important in machine learning for regression?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"You might encounter a LinAlgError: Singular matrix error while following course videos due to a few reasons. Firstly, it can happen because not every matrix can be inverted, which is one of the main causes of a singular matrix error. Specifically, in the course, it often occurs when dealing with matrix multiplication, as the order in which matrices are multiplied is crucial. Make sure to respect the order of multiplication, as X.dot(Y) is not necessarily equal to Y.dot(X). \\n\\nAdditionally, if you have performed the inverse of X once in your code and attempt to do it again, that could also trigger the error. It's a normal part of the learning process, and the reason for this error will be further explained in the Regularization video.\",\n",
" 'answer_orig': 'It’s possible that when you follow the videos, you’ll get a Singular Matrix error. We will explain why it happens in the Regularization video. Don’t worry, it’s normal that you have it.\\nYou can also have an error because you did the inverse of X once in your code and you’re doing it a second time.\\n(Added by Cécile Guillot)',\n",
" 'document': '6f3bdd20',\n",
" 'question': 'Why might I encounter a LinAlgError: Singular matrix error while following course videos?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, the Regularization video will explain why the LinAlgError: Singular matrix issue happens.',\n",
" 'answer_orig': 'It’s possible that when you follow the videos, you’ll get a Singular Matrix error. We will explain why it happens in the Regularization video. Don’t worry, it’s normal that you have it.\\nYou can also have an error because you did the inverse of X once in your code and you’re doing it a second time.\\n(Added by Cécile Guillot)',\n",
" 'document': '6f3bdd20',\n",
" 'question': 'Will the Regularization video explain the LinAlgError: Singular matrix issue?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, it is normal to encounter a Singular Matrix error when following the course. It may happen during the lessons, and you will receive an explanation for it in the Regularization video. Don't worry, it's a common occurrence in the course.\",\n",
" 'answer_orig': 'It’s possible that when you follow the videos, you’ll get a Singular Matrix error. We will explain why it happens in the Regularization video. Don’t worry, it’s normal that you have it.\\nYou can also have an error because you did the inverse of X once in your code and you’re doing it a second time.\\n(Added by Cécile Guillot)',\n",
" 'document': '6f3bdd20',\n",
" 'question': 'Is it normal to get a Singular Matrix error when following the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, performing the inverse of X twice in your code can cause a LinAlgError, specifically a Singular Matrix error. This can occur because not every matrix can be inverted, and if you attempt to invert it again after already performing the inverse, it may lead to such an error.',\n",
" 'answer_orig': 'It’s possible that when you follow the videos, you’ll get a Singular Matrix error. We will explain why it happens in the Regularization video. Don’t worry, it’s normal that you have it.\\nYou can also have an error because you did the inverse of X once in your code and you’re doing it a second time.\\n(Added by Cécile Guillot)',\n",
" 'document': '6f3bdd20',\n",
" 'question': 'Can performing the inverse of X twice in my code cause a LinAlgError?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The additional information regarding the Singular Matrix error was provided by Cécile Guillot.',\n",
" 'answer_orig': 'It’s possible that when you follow the videos, you’ll get a Singular Matrix error. We will explain why it happens in the Regularization video. Don’t worry, it’s normal that you have it.\\nYou can also have an error because you did the inverse of X once in your code and you’re doing it a second time.\\n(Added by Cécile Guillot)',\n",
" 'document': '6f3bdd20',\n",
" 'question': 'Who provided the additional information regarding the Singular Matrix error?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The context provided does not specify a particular dataset used for regression in the course. However, it mentions several general sources for datasets, including Kaggle, UCI Machine Learning Repository, and others. You may need to choose an appropriate dataset from those sources for your regression tasks.',\n",
" 'answer_orig': 'You can find a detailed description of the dataset ere https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html\\nKS',\n",
" 'document': '27c2d90a',\n",
" 'question': 'What dataset is used for regression in the course?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can find a detailed description of the California housing dataset [here](https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html).',\n",
" 'answer_orig': 'You can find a detailed description of the dataset ere https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html\\nKS',\n",
" 'document': '27c2d90a',\n",
" 'question': 'Where can I find information about the California housing dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, details on the dataset used in Section 2, specifically the California housing dataset, can be found in the following link: [California housing dataset](https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html).',\n",
" 'answer_orig': 'You can find a detailed description of the dataset ere https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html\\nKS',\n",
" 'document': '27c2d90a',\n",
" 'question': 'Do you provide details on the datasets used in Section 2?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, you can learn more about the California housing dataset by visiting the following link: [California housing dataset description](https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html).',\n",
" 'answer_orig': 'You can find a detailed description of the dataset ere https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html\\nKS',\n",
" 'document': '27c2d90a',\n",
" 'question': 'Is there a link to learn more about the California housing dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To access the description for the dataset in Machine Learning for Regression, you will need to read the dataset directly from GitHub. You can do this by using the following code to load it into a pandas dataframe:\\n\\n```python\\ndfh = pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\n```',\n",
" 'answer_orig': 'You can find a detailed description of the dataset ere https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html\\nKS',\n",
" 'document': '27c2d90a',\n",
" 'question': 'How can I access the description for the dataset in Machine Learning for Regression?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You got NaNs after applying .mean() when using for loops to calculate RMSE because there were NaN values present in your data. This issue occurred during the mean step of your RMSE function, specifically after squaring the errors. The root of the problem was that you only applied fillna(0) to the training data, not to the validation and test datasets. After applying fillna(0) to all datasets (train, val, test), the issue was resolved, and your for loops to calculate RMSE worked correctly.',\n",
" 'answer_orig': 'I was using for loops to apply rmse to list of y_val and y_pred. But the resulting rmse is all nan.\\nI found out that the problem was when my data reached the mean step after squaring the error in the rmse function. Turned out there were nan in the array, then I traced the problem back to where I first started to split the data: I had only use fillna(0) on the train data, not on the validation and test data. So the problem was fixed after I applied fillna(0) to all the dataset (train, val, test). Voila, my for loops to get rmse from all the seed values work now.\\nAdded by Sasmito Yudha Husada',\n",
" 'document': '88e9600a',\n",
" 'question': 'Why did I get NaNs after applying .mean() when using for loops to calculate RMSE?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'I identified the cause of NaNs in my RMSE calculation by tracing the problem back to where I first started to split the data. Initially, I had used `fillna(0)` only on the train data and not on the validation and test data. This resulted in NaNs appearing in the array after squaring the errors in the RMSE function. Once I applied `fillna(0)` to all the datasets (train, validation, and test), the issue was resolved and my for loops to calculate RMSE from all the seed values worked correctly.',\n",
" 'answer_orig': 'I was using for loops to apply rmse to list of y_val and y_pred. But the resulting rmse is all nan.\\nI found out that the problem was when my data reached the mean step after squaring the error in the rmse function. Turned out there were nan in the array, then I traced the problem back to where I first started to split the data: I had only use fillna(0) on the train data, not on the validation and test data. So the problem was fixed after I applied fillna(0) to all the dataset (train, val, test). Voila, my for loops to get rmse from all the seed values work now.\\nAdded by Sasmito Yudha Husada',\n",
" 'document': '88e9600a',\n",
" 'question': 'How did you identify the cause of NaNs in your RMSE calculation?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The issue leading to NaNs in the RMSE function was due to NaN values in the data array. This occurred because the user had only applied `fillna(0)` to the training data and not to the validation and test data. Once `fillna(0)` was applied to all datasets (train, validation, and test), the NaN issue was resolved, and the RMSE calculations worked correctly.',\n",
" 'answer_orig': 'I was using for loops to apply rmse to list of y_val and y_pred. But the resulting rmse is all nan.\\nI found out that the problem was when my data reached the mean step after squaring the error in the rmse function. Turned out there were nan in the array, then I traced the problem back to where I first started to split the data: I had only use fillna(0) on the train data, not on the validation and test data. So the problem was fixed after I applied fillna(0) to all the dataset (train, val, test). Voila, my for loops to get rmse from all the seed values work now.\\nAdded by Sasmito Yudha Husada',\n",
" 'document': '88e9600a',\n",
" 'question': 'What was the issue with the data that led to NaNs in the RMSE function?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The problem of getting NaNs in the RMSE calculation was resolved by applying `fillna(0)` to all the datasets (train, validation, and test), instead of just the train data.',\n",
" 'answer_orig': 'I was using for loops to apply rmse to list of y_val and y_pred. But the resulting rmse is all nan.\\nI found out that the problem was when my data reached the mean step after squaring the error in the rmse function. Turned out there were nan in the array, then I traced the problem back to where I first started to split the data: I had only use fillna(0) on the train data, not on the validation and test data. So the problem was fixed after I applied fillna(0) to all the dataset (train, val, test). Voila, my for loops to get rmse from all the seed values work now.\\nAdded by Sasmito Yudha Husada',\n",
" 'document': '88e9600a',\n",
" 'question': 'What step resolved the problem of getting NaNs in the RMSE calculation?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You needed to apply `fillna(0)` on all datasets, including train, validation, and test, because if you only applied it to the training data, any NaNs left in the validation and test datasets would result in errors or NaN values during subsequent calculations, such as when computing metrics like RMSE. Ensuring that all datasets are filled eliminates the risk of encountering NaN values during analysis, which can disrupt the processing and evaluation of your machine learning models.',\n",
" 'answer_orig': 'I was using for loops to apply rmse to list of y_val and y_pred. But the resulting rmse is all nan.\\nI found out that the problem was when my data reached the mean step after squaring the error in the rmse function. Turned out there were nan in the array, then I traced the problem back to where I first started to split the data: I had only use fillna(0) on the train data, not on the validation and test data. So the problem was fixed after I applied fillna(0) to all the dataset (train, val, test). Voila, my for loops to get rmse from all the seed values work now.\\nAdded by Sasmito Yudha Husada',\n",
" 'document': '88e9600a',\n",
" 'question': 'Why did you need to apply fillna(0) on all datasets, including train, val, and test?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"We should transform the target variable to logarithm distribution when the target variable is highly skewed. This transformation helps to manage long tail distributions, such as prices, by stabilizing variance and making the data more normally distributed. The easiest way to evaluate this is by plotting the distribution of the target variable to assess skewness. However, it's important to note that if the target variable contains negative values, the logarithmic transformation cannot be applied.\",\n",
" 'answer_orig': 'Why should we transform the target variable to logarithm distribution? Do we do this for all machine learning projects?\\nOnly if you see that your target is highly skewed. The easiest way to evaluate this is by plotting the distribution of the target variable.\\nThis can help to understand skewness and how it can be applied to the distribution of your data set.\\nhttps://en.wikipedia.org/wiki/Skewness\\nPastor Soto',\n",
" 'document': 'd59d8df7',\n",
" 'question': 'Why should we transform the target variable to logarithm distribution?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'We do not transform the target variable to logarithm distribution for all machine learning projects. This transformation is only applied if the target variable is highly skewed. The easiest way to evaluate skewness is by plotting the distribution of the target variable. If it has a long tail distribution, like in prices, you can transform it using the np.log1p() method, but this method cannot be used if the target variable has negative values.',\n",
" 'answer_orig': 'Why should we transform the target variable to logarithm distribution? Do we do this for all machine learning projects?\\nOnly if you see that your target is highly skewed. The easiest way to evaluate this is by plotting the distribution of the target variable.\\nThis can help to understand skewness and how it can be applied to the distribution of your data set.\\nhttps://en.wikipedia.org/wiki/Skewness\\nPastor Soto',\n",
" 'document': 'd59d8df7',\n",
" 'question': 'Do we transform the target variable to logarithm distribution for all machine learning projects?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You should transform the target variable to a logarithm distribution when the target variable has a long tail distribution, such as prices, and exhibits a wide range. This can be done using the np.log1p() method. However, be cautious if your target variable has negative values, as this method will not be effective in that case. Additionally, transformation is beneficial if the target is highly skewed, which can be assessed by plotting its distribution.',\n",
" 'answer_orig': 'Why should we transform the target variable to logarithm distribution? Do we do this for all machine learning projects?\\nOnly if you see that your target is highly skewed. The easiest way to evaluate this is by plotting the distribution of the target variable.\\nThis can help to understand skewness and how it can be applied to the distribution of your data set.\\nhttps://en.wikipedia.org/wiki/Skewness\\nPastor Soto',\n",
" 'document': 'd59d8df7',\n",
" 'question': 'When should we transform the target variable?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To evaluate if the target variable is highly skewed, you can plot the distribution of the target variable using a histogram. This visual representation will help you understand the skewness of the data. Additionally, you can calculate the skewness value using the `.skew()` method in Python. A significantly skewed value indicates that the target variable may need transformation.',\n",
" 'answer_orig': 'Why should we transform the target variable to logarithm distribution? Do we do this for all machine learning projects?\\nOnly if you see that your target is highly skewed. The easiest way to evaluate this is by plotting the distribution of the target variable.\\nThis can help to understand skewness and how it can be applied to the distribution of your data set.\\nhttps://en.wikipedia.org/wiki/Skewness\\nPastor Soto',\n",
" 'document': 'd59d8df7',\n",
" 'question': 'How can we evaluate if the target variable is highly skewed?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Understanding the skewness of the target variable is important because it helps determine whether transformation, such as applying a logarithmic distribution, is needed. This is particularly relevant when the target variable has a long tail distribution, as in the case of prices. Evaluating the skewness by plotting the distribution of the target variable can guide decisions on how to transform the data, which can subsequently enhance the performance of machine learning models.',\n",
" 'answer_orig': 'Why should we transform the target variable to logarithm distribution? Do we do this for all machine learning projects?\\nOnly if you see that your target is highly skewed. The easiest way to evaluate this is by plotting the distribution of the target variable.\\nThis can help to understand skewness and how it can be applied to the distribution of your data set.\\nhttps://en.wikipedia.org/wiki/Skewness\\nPastor Soto',\n",
" 'document': 'd59d8df7',\n",
" 'question': 'Why is it important to understand the skewness of the target variable?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can read a dataset directly from GitHub into a pandas dataframe using the following code:\\n\\n```python\\ndfh = pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\n```',\n",
" 'answer_orig': 'The dataset can be read directly to pandas dataframe from the github link using the technique shown below\\ndfh=pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\nKrishna Anand',\n",
" 'document': '0b3eaf92',\n",
" 'question': 'How can I read a dataset directly from GitHub into a pandas dataframe?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can load data from a GitHub link into pandas by using the following method:\\n\\n```python\\ndfh = pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\n```\\n\\nThis reads the dataset directly into a pandas DataFrame.',\n",
" 'answer_orig': 'The dataset can be read directly to pandas dataframe from the github link using the technique shown below\\ndfh=pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\nKrishna Anand',\n",
" 'document': '0b3eaf92',\n",
" 'question': 'What method allows me to load data from a GitHub link into pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To read data from a GitHub URL using pandas, you can use the following code:\\n\\n```python\\ndf = pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\n```\\n\\nThis will directly load the dataset into a pandas dataframe from the provided GitHub link.',\n",
" 'answer_orig': 'The dataset can be read directly to pandas dataframe from the github link using the technique shown below\\ndfh=pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\nKrishna Anand',\n",
" 'document': '0b3eaf92',\n",
" 'question': 'Could you explain how to read data from a GitHub URL using pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To import a dataset from a GitHub link into a pandas dataframe, you can use the following code:\\n\\n```python\\ndfh = pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\n```',\n",
" 'answer_orig': 'The dataset can be read directly to pandas dataframe from the github link using the technique shown below\\ndfh=pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\nKrishna Anand',\n",
" 'document': '0b3eaf92',\n",
" 'question': 'What is the code to import a dataset from a GitHub link into a pandas dataframe?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To fetch data from a GitHub repository into a pandas dataframe, you can use the following method:\\n\\n```python\\ndfh = pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\n```\\n\\nAlternatively, you can also use the `requests` library to download the dataset first and then read it into a pandas dataframe:\\n\\n```python\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\n with open(\\'housing.csv\\', \\'wb\\') as file:\\n file.write(response.content)\\n \\ndf = pd.read_csv(\\'housing.csv\\')\\n```\\n\\nBoth methods will allow you to load the data from the GitHub repository into a pandas dataframe.',\n",
" 'answer_orig': 'The dataset can be read directly to pandas dataframe from the github link using the technique shown below\\ndfh=pd.read_csv(\"https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\")\\nKrishna Anand',\n",
" 'document': '0b3eaf92',\n",
" 'question': 'How do I fetch data from a GitHub repository into a pandas dataframe?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To load the dataset directly into Kaggle Notebooks, you can use the following command:\\n\\n```bash\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\nMake sure to include the `!` before `wget`, as it is essential. Once the dataset is loaded to the Kaggle notebook server, you can read it using the following pandas command:\\n\\n```python\\ndf = pd.read_csv('housing.csv')\\n```\",\n",
" 'answer_orig': \"For users of kaggle notebooks, the dataset can be loaded through widget using the below command. Please remember that ! before wget is essential\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nOnce the dataset is loaded to the kaggle notebook server, it can be read through the below pandas command\\ndf = pd.read_csv('housing.csv')\\nHarish Balasundaram\",\n",
" 'document': '8fe56032',\n",
" 'question': 'How can I load the dataset directly into Kaggle Notebooks?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'To download the dataset to Kaggle Notebooks, use the following command:\\n\\n```\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n```\\n\\nMake sure to include the `!` before the `wget` command.',\n",
" 'answer_orig': \"For users of kaggle notebooks, the dataset can be loaded through widget using the below command. Please remember that ! before wget is essential\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nOnce the dataset is loaded to the kaggle notebook server, it can be read through the below pandas command\\ndf = pd.read_csv('housing.csv')\\nHarish Balasundaram\",\n",
" 'document': '8fe56032',\n",
" 'question': 'What command do I need to use to download the dataset to Kaggle Notebooks?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'Yes, there is a specific symbol you need to use before the wget command when using it in Google Colab. You should use the exclamation mark `!` to indicate that you are running a shell command. For example, the command would look like this: `!wget -P /content/drive/My\\\\ Drive/Downloads/ URL`.',\n",
" 'answer_orig': \"For users of kaggle notebooks, the dataset can be loaded through widget using the below command. Please remember that ! before wget is essential\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nOnce the dataset is loaded to the kaggle notebook server, it can be read through the below pandas command\\ndf = pd.read_csv('housing.csv')\\nHarish Balasundaram\",\n",
" 'document': '8fe56032',\n",
" 'question': 'Is there a specific symbol I need to use before the wget command?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"To read the dataset in a Kaggle Notebook after downloading, you can use the following commands:\\n\\n1. Load the dataset using the `wget` command:\\n ```python\\n !wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\n ```\\n\\n2. Once the dataset is loaded, read it using the pandas library:\\n ```python\\n df = pd.read_csv('housing.csv')\\n ``` \\n\\nMake sure to include the `!` before `wget` when using the command in the notebook.\",\n",
" 'answer_orig': \"For users of kaggle notebooks, the dataset can be loaded through widget using the below command. Please remember that ! before wget is essential\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nOnce the dataset is loaded to the kaggle notebook server, it can be read through the below pandas command\\ndf = pd.read_csv('housing.csv')\\nHarish Balasundaram\",\n",
" 'document': '8fe56032',\n",
" 'question': 'After downloading, how do I read the dataset in a Kaggle Notebook?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The instructions for loading the dataset in Kaggle Notebooks were provided by Harish Balasundaram.',\n",
" 'answer_orig': \"For users of kaggle notebooks, the dataset can be loaded through widget using the below command. Please remember that ! before wget is essential\\n!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\nOnce the dataset is loaded to the kaggle notebook server, it can be read through the below pandas command\\ndf = pd.read_csv('housing.csv')\\nHarish Balasundaram\",\n",
" 'document': '8fe56032',\n",
" 'question': 'Who provided the instructions for loading the dataset in Kaggle Notebooks?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can filter a dataset by specific values in a column using the following methods:\\n\\n1. Using logical operators:\\n```python\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\n```\\nHere, `|` is used for \\'OR\\' conditions.\\n\\n2. Using the `isin` method:\\n```python\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\n```\\n\\nBoth approaches allow you to filter the dataset based on specific values in the \\'ocean_proximity\\' column.',\n",
" 'answer_orig': 'We can filter a dataset by using its values as below.\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\nYou can use | for ‘OR’, and & for ‘AND’\\nAlternative:\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\nRadikal Lukafiardi',\n",
" 'document': 'af833e0a',\n",
" 'question': 'How can I filter a dataset by specific values in a column?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"The symbols used for 'OR' and 'AND' operations when filtering a dataset in Pandas are | for 'OR' and & for 'AND'.\",\n",
" 'answer_orig': 'We can filter a dataset by using its values as below.\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\nYou can use | for ‘OR’, and & for ‘AND’\\nAlternative:\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\nRadikal Lukafiardi',\n",
" 'document': 'af833e0a',\n",
" 'question': \"What are the symbols used for 'OR' and 'AND' operations when filtering a dataset?\",\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"Yes, there is an alternative method to filter a dataset by specific column values. You can use the `isin` method as follows:\\n\\n```python\\ndf = df[df['ocean_proximity'].isin(['<1H OCEAN', 'INLAND'])]\\n```\\n\\nThis allows you to filter the dataset for multiple values in the specified column, instead of using `|` for 'OR' conditions.\",\n",
" 'answer_orig': 'We can filter a dataset by using its values as below.\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\nYou can use | for ‘OR’, and & for ‘AND’\\nAlternative:\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\nRadikal Lukafiardi',\n",
" 'document': 'af833e0a',\n",
" 'question': 'Is there an alternative method to filter a dataset by specific column values?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can filter a dataset using multiple conditions in Pandas by employing logical operators. For example, to filter a dataset for rows where the \"ocean_proximity\" is either \"<1H OCEAN\" or \"INLAND\", you can use the following syntax:\\n\\n```python\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\n```\\n\\nAlternatively, you can use the `isin` method for a more concise approach:\\n\\n```python\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\n```\\n\\nTo combine conditions using logical AND, you would use the `&` operator.',\n",
" 'answer_orig': 'We can filter a dataset by using its values as below.\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\nYou can use | for ‘OR’, and & for ‘AND’\\nAlternative:\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\nRadikal Lukafiardi',\n",
" 'document': 'af833e0a',\n",
" 'question': 'Can you provide an example of filtering a dataset using multiple conditions?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'The `isin()` method in the context of filtering a dataset is used to filter rows based on whether the values in a specified column are within a given list of values. For example, you can use it as follows:\\n\\n```python\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\n```\\n\\nThis will return a subset of the dataset where the values in the `\"ocean_proximity\"` column are either `\"<1H OCEAN\"` or `\"INLAND\"`.',\n",
" 'answer_orig': 'We can filter a dataset by using its values as below.\\ndf = df[(df[\"ocean_proximity\"] == \"<1H OCEAN\") | (df[\"ocean_proximity\"] == \"INLAND\")]\\nYou can use | for ‘OR’, and & for ‘AND’\\nAlternative:\\ndf = df[df[\\'ocean_proximity\\'].isin([\\'<1H OCEAN\\', \\'INLAND\\'])]\\nRadikal Lukafiardi',\n",
" 'document': 'af833e0a',\n",
" 'question': 'What does the isin() method do in the context of filtering a dataset?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'An alternative way to load the data using the requests library is as follows:\\n\\n```python\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\n with open(\\'housing.csv\\', \\'wb\\') as file:\\n file.write(response.content)\\nelse:\\n print(\"Download failed.\")\\n```\\n\\nThis approach retrieves the dataset from the specified URL and saves it to a file named `housing.csv`.',\n",
" 'answer_orig': 'Above users showed how to load the dataset directly from github. Here is another useful way of doing this using the `requests` library:\\n# Get data for homework\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\nwith open(\\'housing.csv\\', \\'wb\\') as file:\\nfile.write(response.content)\\nelse:\\nprint(\"Download failed.\")\\nTyler Simpson',\n",
" 'document': '8d209d6d',\n",
" 'question': 'What is an alternative way to load the data using the requests library?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can directly download the dataset from GitHub without using pandas by using the `requests` library in Python. Here’s how you can do it:\\n\\n```python\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\n with open(\\'housing.csv\\', \\'wb\\') as file:\\n file.write(response.content)\\nelse:\\n print(\"Download failed.\")\\n```\\n\\nThis code sends a GET request to the dataset\\'s URL and saves the content to a file named \\'housing.csv\\' if the download is successful.',\n",
" 'answer_orig': 'Above users showed how to load the dataset directly from github. Here is another useful way of doing this using the `requests` library:\\n# Get data for homework\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\nwith open(\\'housing.csv\\', \\'wb\\') as file:\\nfile.write(response.content)\\nelse:\\nprint(\"Download failed.\")\\nTyler Simpson',\n",
" 'document': '8d209d6d',\n",
" 'question': 'How can I directly download the dataset from GitHub without using pandas?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'You can fetch data for homework using the `requests` library with the following method:\\n\\n```python\\nimport requests\\n\\n# Get data for homework\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\n\\nif response.status_code == 200:\\n with open(\\'housing.csv\\', \\'wb\\') as file:\\n file.write(response.content)\\nelse:\\n print(\"Download failed.\")\\n```\\n\\nThis code retrieves the dataset from GitHub and saves it as \\'housing.csv\\' on your local machine.',\n",
" 'answer_orig': 'Above users showed how to load the dataset directly from github. Here is another useful way of doing this using the `requests` library:\\n# Get data for homework\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\nwith open(\\'housing.csv\\', \\'wb\\') as file:\\nfile.write(response.content)\\nelse:\\nprint(\"Download failed.\")\\nTyler Simpson',\n",
" 'document': '8d209d6d',\n",
" 'question': 'Could you share a method to fetch data for homework using the requests library?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': \"If the status code is 200 when downloading data with requests, you should open a file in write-binary mode and write the content of the response to that file. Here's how you can do it:\\n\\n```python\\nwith open('housing.csv', 'wb') as file:\\n file.write(response.content)\\n```\\n\\nThis indicates that the download was successful, and you can proceed to save the downloaded data into a file.\",\n",
" 'answer_orig': 'Above users showed how to load the dataset directly from github. Here is another useful way of doing this using the `requests` library:\\n# Get data for homework\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\nwith open(\\'housing.csv\\', \\'wb\\') as file:\\nfile.write(response.content)\\nelse:\\nprint(\"Download failed.\")\\nTyler Simpson',\n",
" 'document': '8d209d6d',\n",
" 'question': 'What should I do if the status code is 200 when downloading data with requests?',\n",
" 'course': 'machine-learning-zoomcamp'},\n",
" {'answer_llm': 'If the file download fails when using the requests library, the code will print \"Download failed.\" This occurs when the response status code is not 200.',\n",
" 'answer_orig': 'Above users showed how to load the dataset directly from github. Here is another useful way of doing this using the `requests` library:\\n# Get data for homework\\nimport requests\\nurl = \\'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/housing.csv\\'\\nresponse = requests.get(url)\\nif response.status_code == 200:\\nwith open(\\'housing.csv\\', \\'wb\\') as file:\\nfile.write(response.content)\\nelse:\\nprint(\"Download failed.\")\\nTyler Simpson',\n",
" 'document': '8d209d6d',\n",
" 'question': 'What does the code using the requests library do if the file download fails?',\n",
" 'course': 'machine-learning-zoomcamp'}]"
]
},
"metadata": {},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"source": [
"evaluations = [compute_similarity(embedding_model, record) for record in records]"
],
"metadata": {
"id": "jY2JSuOnBSKU"
},
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"f\"Q2) What's the 75% percentile of the score? R2: {np.quantile(evaluations, 0.75)}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "nWRLquYUBRfA",
"outputId": "eea7ea01-f00d-4d40-c0b1-35257babaf7d"
},
"execution_count": 24,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"Q2) What's the 75% percentile of the score? R2: 31.674307823181152\""
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "markdown",
"source": [
"\n",
"## Q3. Computing the cosine\n",
"\n",
"From Q2, we can see that the results are not within the [0, 1] range. It's because the vectors coming from this model are not normalized.\n",
"\n",
"So we need to normalize them.\n",
"\n",
"To do it, we\n",
"\n",
"* Compute the norm of a vector\n",
"* Divide each element by this norm\n",
"\n",
"So, for vector `v`, it'll be `v / ||v||`\n",
"\n",
"In numpy, this is how you do it:\n",
"\n",
"```python\n",
"norm = np.sqrt((v * v).sum())\n",
"v_norm = v / norm\n",
"```\n",
"\n",
"Let's put it into a function and then compute dot product\n",
"between normalized vectors. This will give us cosine similarity\n",
"\n",
"What's the 75% cosine in the scores?\n",
"\n",
"* 0.63\n",
"* 0.73\n",
"* 0.83\n",
"* 0.93\n"
],
"metadata": {
"id": "liOMLDArB4PM"
}
},
{
"cell_type": "code",
"source": [
"def normalize(v):\n",
" norm = np.sqrt((v * v).sum())\n",
" v_norm = v / norm\n",
" return v_norm\n",
"\n",
"def normalizeV2(v):\n",
" return v/np.linalg.norm(v)\n",
"\n",
"def compute_similarity_normalized(model, record: dict):\n",
" answer_orig = record['answer_orig']\n",
" answer_llm = record['answer_llm']\n",
"\n",
" v_llm = normalizeV2(model.encode(answer_llm))\n",
" v_orig = normalizeV2(model.encode(answer_orig))\n",
"\n",
" return v_llm.dot(v_orig)"
],
"metadata": {
"id": "eBBvu6LsCa3e"
},
"execution_count": 31,
"outputs": []
},
{
"cell_type": "code",
"source": [
"all(normalize(np.array(evaluations)) == normalizeV2(evaluations))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1NfS_zqOBqqB",
"outputId": "208c66d3-2374-4ace-97f2-09d2b8b2aea5"
},
"execution_count": 32,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 32
}
]
},
{
"cell_type": "code",
"source": [
"evaluations_normalized = [compute_similarity_normalized(embedding_model, record) for record in records]\n"
],
"metadata": {
"id": "wnGs2Sn0Dh0J"
},
"execution_count": 33,
"outputs": []
},
{
"cell_type": "code",
"source": [
"f\"Q3) What's the 75% cosine in the scores? R3: {np.quantile(evaluations_normalized, 0.75)}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "ASUMKtKiCQRR",
"outputId": "9a4133ee-6c33-47c5-dae0-daf30902ba10"
},
"execution_count": 35,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"Q3) What's the 75% cosine in the scores? R3: 0.8362347632646561\""
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 35
}
]
},
{
"cell_type": "markdown",
"source": [
"## Q4. Rouge\n",
"\n",
"Now we will explore an alternative metric - the ROUGE score. \n",
"\n",
"This is a set of metrics that compares two answers based on the overlap of n-grams, word sequences, and word pairs.\n",
"\n",
"It can give a more nuanced view of text similarity than just cosine similarity alone.\n",
"\n",
"We don't need to implement it ourselves, there's a python package for it:\n",
"\n",
"```bash\n",
"pip install rouge\n",
"```\n",
"\n",
"(The latest version at the moment of writing is `1.0.1`)\n",
"\n",
"Let's compute the ROUGE score between the answers at the index 10 of our dataframe (`doc_id=5170565b`)\n",
"\n",
"```\n",
"from rouge import Rouge\n",
"rouge_scorer = Rouge()\n",
"\n",
"scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]\n",
"```\n",
"\n",
"There are three scores: `rouge-1`, `rouge-2` and `rouge-l`, and precision, recall and F1 score for each.\n",
"\n",
"* `rouge-1` - the overlap of unigrams,\n",
"* `rouge-2` - bigrams,\n",
"* `rouge-l` - the longest common subsequence\n",
"\n",
"What's the F score for `rouge-1`?\n",
"\n",
"- 0.35\n",
"- 0.45\n",
"- 0.55\n",
"- 0.65"
],
"metadata": {
"id": "r6JHzcq2D1c4"
}
},
{
"cell_type": "code",
"source": [
"!pip install rouge==1.0.1"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Vx2l5T9YD1AB",
"outputId": "23f3aca0-c12c-447c-c71d-f27fe1ebfa69"
},
"execution_count": 37,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting rouge==1.0.1\n",
" Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from rouge==1.0.1) (1.16.0)\n",
"Downloading rouge-1.0.1-py3-none-any.whl (13 kB)\n",
"Installing collected packages: rouge\n",
"Successfully installed rouge-1.0.1\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from rouge import Rouge\n",
"rouge_scorer = Rouge()\n",
"r = list(filter(lambda r: r['document'] == '5170565b', records))[0]\n",
"\n",
"scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]\n",
"scores"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "94dIx0PbDMJ3",
"outputId": "23798f69-e1d6-42a5-8c61-be90c5f5d24a"
},
"execution_count": 40,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'rouge-1': {'r': 0.45454545454545453,\n",
" 'p': 0.45454545454545453,\n",
" 'f': 0.45454544954545456},\n",
" 'rouge-2': {'r': 0.21621621621621623,\n",
" 'p': 0.21621621621621623,\n",
" 'f': 0.21621621121621637},\n",
" 'rouge-l': {'r': 0.3939393939393939,\n",
" 'p': 0.3939393939393939,\n",
" 'f': 0.393939388939394}}"
]
},
"metadata": {},
"execution_count": 40
}
]
},
{
"cell_type": "code",
"source": [
"f\"Q4) What's the F score for `rouge-1`? R4: {scores['rouge-1']['f']}\"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "Og8oPAyFEJSz",
"outputId": "1f3894c9-1576-403a-a7bb-4e23c79e53ef"
},
"execution_count": 41,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"Q4) What's the F score for `rouge-1`? R4: 0.45454544954545456\""
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 41
}
]
},
{
"cell_type": "markdown",
"source": [
"## Q5. Average rouge score\n",
"\n",
"Let's compute the average F-score between `rouge-1`, `rouge-2` and `rouge-l` for the same record from Q4\n",
"\n",
"- 0.35\n",
"- 0.45\n",
"- 0.55\n",
"- 0.65"
],
"metadata": {
"id": "SjUJ-3MjEyKY"
}
},
{
"cell_type": "code",
"source": [
"scores.keys()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "w0YqE1V5FA0Z",
"outputId": "f6a11885-8577-415d-9921-143b37d281b0"
},
"execution_count": 43,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"dict_keys(['rouge-1', 'rouge-2', 'rouge-l'])"
]
},
"metadata": {},
"execution_count": 43
}
]
},
{
"cell_type": "code",
"source": [
"f\"Q5) What's the avg-F score for all rouges? R5: {np.mean([scores[key]['f'] for key in scores.keys()])}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "BPd4yTuiExxw",
"outputId": "5e7a37ca-4589-4bd0-98ca-dc9b65e0965c"
},
"execution_count": 47,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"Q5) What's the avg-F score for all rouges? R5: 0.35490034990035496\""
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 47
}
]
},
{
"cell_type": "markdown",
"source": [
"## Q6. Average rouge score for all the data points\n",
"\n",
"Now let's compute the F-score for all the records and create a dataframe from them.\n",
"\n",
"What's the average F-score in `rouge_2` across all the records?\n",
"\n",
"- 0.10\n",
"- 0.20\n",
"- 0.30\n",
"- 0.40\n"
],
"metadata": {
"id": "JkrV3YoNFXrV"
}
},
{
"cell_type": "code",
"source": [
"rouge_scores = [rouge_scorer.get_scores(record['answer_llm'], record['answer_orig'])[0] for record in records]"
],
"metadata": {
"id": "z8zjMPSIEl-Z"
},
"execution_count": 64,
"outputs": []
},
{
"cell_type": "code",
"source": [
"f\"Q6) What's the average F-score in `rouge_2` across all the records? R6: {np.mean([rs['rouge-2']['f'] for rs in rouge_scores])}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "UsOYshkVFvmJ",
"outputId": "2d9905ed-e60c-4965-8347-c1d7ab5d1693"
},
"execution_count": 69,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"Q6) What's the average F-score in `rouge_2` across all the records? R6: 0.20696501983423318\""
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 69
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "XDGy_BG2FxLL"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment