{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4", "authorship_tag": "ABX9TyN5b5wF0ITT+T1IRzUm6Zjj", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "52c4bf7418f74bc79a8c12fe35901974": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c5e609d111b34d408a53a4cd71bb43d5", "IPY_MODEL_0e0a20b5ed7a44e9834022e7eba2194d", "IPY_MODEL_b5627331e78e4eb28765ed20f32cf403" ], "layout": "IPY_MODEL_8084d4cb267f4a52b3d80ec34d291190" } }, "c5e609d111b34d408a53a4cd71bb43d5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a8dcdf1f7ab64242acb057e8b54ebf79", "placeholder": "​", "style": "IPY_MODEL_1ca492fddbaa4ea7a3226649154e01fd", "value": "Loading checkpoint shards: 100%" } }, "0e0a20b5ed7a44e9834022e7eba2194d": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a8eda8bfe08e4152a80c63830138c96d", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_1f258eacd6d0472385d41523b65dea8b", "value": 2 } }, "b5627331e78e4eb28765ed20f32cf403": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_228b1bcf604f454f8060a250b58008a1", "placeholder": "​", "style": "IPY_MODEL_90b281e9c5ed4e77ab93e5879d0b15a3", "value": " 2/2 [01:13<00:00, 33.04s/it]" } }, "8084d4cb267f4a52b3d80ec34d291190": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a8dcdf1f7ab64242acb057e8b54ebf79": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1ca492fddbaa4ea7a3226649154e01fd": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a8eda8bfe08e4152a80c63830138c96d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1f258eacd6d0472385d41523b65dea8b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "228b1bcf604f454f8060a250b58008a1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "90b281e9c5ed4e77ab93e5879d0b15a3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "051d193cd87f47c1971fb87544e1e615": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9d7247c119e642c5894f15ca6974ef3e", "IPY_MODEL_a79c22bb34ec4f698a00752b47a6f631", "IPY_MODEL_d95f3a3f26c6470d984542cdfd68bec1" ], "layout": "IPY_MODEL_343e11c62a59448eb43bbc0c31bf5f11" } }, "9d7247c119e642c5894f15ca6974ef3e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a153c96bd1fe4c48a41e9b9c7c00dd6e", "placeholder": "​", "style": "IPY_MODEL_84da055d24694320843e13ad37438792", "value": "Loading checkpoint shards: 100%" } }, "a79c22bb34ec4f698a00752b47a6f631": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e375632975904402baea46163e2eeca1", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_95501d0b5a22407288f008bf8cc69726", "value": 2 } }, "d95f3a3f26c6470d984542cdfd68bec1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6aef866a6c474dfabb2140ded933c5aa", "placeholder": "​", "style": "IPY_MODEL_d66fa096d442423c9447cbfbdc1aad8d", "value": " 2/2 [00:59<00:00, 27.43s/it]" } }, "343e11c62a59448eb43bbc0c31bf5f11": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a153c96bd1fe4c48a41e9b9c7c00dd6e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "84da055d24694320843e13ad37438792": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e375632975904402baea46163e2eeca1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "95501d0b5a22407288f008bf8cc69726": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "6aef866a6c474dfabb2140ded933c5aa": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d66fa096d442423c9447cbfbdc1aad8d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c99aff4cfd664ae8a165a27bea0566c8": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e4b64cab6b7b418c8a2575ee26839039", "IPY_MODEL_c3a4fedc73b3480089ef9d13381471ed", "IPY_MODEL_bf722f71c61b4285bcbbf32fd619b3a6" ], "layout": "IPY_MODEL_fd11a6148b704c5b9142c5e8de2d3b25" } }, "e4b64cab6b7b418c8a2575ee26839039": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f0bcdaf940d14ad796fc7ac46c8e1e64", "placeholder": "​", "style": "IPY_MODEL_b6e821c974674f2290c354238d6c919c", "value": "Upload 2 LFS files: 100%" } }, "c3a4fedc73b3480089ef9d13381471ed": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_eeba50e8242c4753bfc0ea48e03f9078", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7a1f3340688d408092adade75f4baac4", "value": 2 } }, "bf722f71c61b4285bcbbf32fd619b3a6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8c887ca9b0eb44fdb8608bf36b5db5c5", "placeholder": "​", "style": "IPY_MODEL_e4698337e6b843afac706ab657ca6af9", "value": " 2/2 [06:36<00:00, 396.47s/it]" } }, "fd11a6148b704c5b9142c5e8de2d3b25": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f0bcdaf940d14ad796fc7ac46c8e1e64": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b6e821c974674f2290c354238d6c919c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "eeba50e8242c4753bfc0ea48e03f9078": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7a1f3340688d408092adade75f4baac4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8c887ca9b0eb44fdb8608bf36b5db5c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e4698337e6b843afac706ab657ca6af9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1af01f1f1aac42b8bff46fe4df8a59ad": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_eee8731f316244eda5ff0765fd12bf85", "IPY_MODEL_f135278e410f4b708435bb80fb630bcf", "IPY_MODEL_2e6fc79bf5c149d6b0bc5c52e18debc7" ], "layout": "IPY_MODEL_a4b0debc025444a59abd6953b3512c0d" } }, "eee8731f316244eda5ff0765fd12bf85": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_130120644beb48acbc038651459af43c", "placeholder": "​", "style": "IPY_MODEL_bf77e97593a349718bdb5fd9bfd28fe3", "value": "pytorch_model-00001-of-00002.bin: 100%" } }, "f135278e410f4b708435bb80fb630bcf": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f7292741953e47699540ef8712fc0d8d", "max": 9976637886, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9434350b1b9c4060812feb9ecbf63278", "value": 9976637886 } }, "2e6fc79bf5c149d6b0bc5c52e18debc7": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b29647e268414329be56047e522e28b9", "placeholder": "​", "style": "IPY_MODEL_27bb18a199ca47108c7a61e9c443de36", "value": " 9.98G/9.98G [06:35<00:00, 25.8MB/s]" } }, "a4b0debc025444a59abd6953b3512c0d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "130120644beb48acbc038651459af43c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bf77e97593a349718bdb5fd9bfd28fe3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f7292741953e47699540ef8712fc0d8d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9434350b1b9c4060812feb9ecbf63278": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b29647e268414329be56047e522e28b9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "27bb18a199ca47108c7a61e9c443de36": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "33ebb868f3e846f6af1a1a2a8ad6a3cb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_1f73f8b4d4da4e74adc135f2a2f6ee65", "IPY_MODEL_68da6e6e69c8419895bea2068760534e", "IPY_MODEL_6dc1a868e08c4c3b8315116d2c46573b" ], "layout": "IPY_MODEL_7a5d714c17374104bb6f5caaa5541c10" } }, "1f73f8b4d4da4e74adc135f2a2f6ee65": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1b6c59a51359453c926bfcddb3d0f0ea", "placeholder": "​", "style": "IPY_MODEL_dac3669f18284161a58d52f26dffb761", "value": "pytorch_model-00002-of-00002.bin: 100%" } }, "68da6e6e69c8419895bea2068760534e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a3511f489f6d47cc8d404ab6f367b29f", "max": 3500316627, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_20670478612f4b1a8a5f23d71a2609a7", "value": 3500316627 } }, "6dc1a868e08c4c3b8315116d2c46573b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b463153ec04749e38540389efa2981f7", "placeholder": "​", "style": "IPY_MODEL_2bb3d36d248a48fba364f14d9e840306", "value": " 3.50G/3.50G [02:27<00:00, 26.4MB/s]" } }, "7a5d714c17374104bb6f5caaa5541c10": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1b6c59a51359453c926bfcddb3d0f0ea": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dac3669f18284161a58d52f26dffb761": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a3511f489f6d47cc8d404ab6f367b29f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "20670478612f4b1a8a5f23d71a2609a7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b463153ec04749e38540389efa2981f7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2bb3d36d248a48fba364f14d9e840306": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# Fine-tune Llama 2 in Google Colab\n", "> 🗣️ Large Language Model Course\n", "\n", "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.\n", "\n", "This notebook runs on a T4 GPU. (Last update: 01 Aug 2023)\n" ], "metadata": { "id": "OSHlAbqzDFDq" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "GLXwJqbjtPho" }, "outputs": [], "source": [ "!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7" ] }, { "cell_type": "code", "source": [ "import os\n", "import torch\n", "from datasets import load_dataset\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig,\n", " HfArgumentParser,\n", " TrainingArguments,\n", " pipeline,\n", " logging,\n", ")\n", "from peft import LoraConfig, PeftModel\n", "from trl import SFTTrainer" ], "metadata": { "id": "nAMzy_0FtaUZ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# The model that you want to train from the Hugging Face hub\n", "model_name = \"NousResearch/Llama-2-7b-chat-hf\"\n", "\n", "# The instruction dataset to use\n", "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n", "\n", "# Fine-tuned model name\n", "new_model = \"llama-2-7b-miniguanaco\"\n", "\n", "################################################################################\n", "# QLoRA parameters\n", "################################################################################\n", "\n", "# LoRA attention dimension\n", "lora_r = 64\n", "\n", "# Alpha parameter for LoRA scaling\n", "lora_alpha = 16\n", "\n", "# Dropout probability for LoRA layers\n", "lora_dropout = 0.1\n", "\n", "################################################################################\n", "# bitsandbytes parameters\n", "################################################################################\n", "\n", "# Activate 4-bit precision base model loading\n", "use_4bit = True\n", "\n", "# Compute dtype for 4-bit base models\n", "bnb_4bit_compute_dtype = \"float16\"\n", "\n", "# Quantization type (fp4 or nf4)\n", "bnb_4bit_quant_type = \"nf4\"\n", "\n", "# Activate nested quantization for 4-bit base models (double quantization)\n", "use_nested_quant = False\n", "\n", "################################################################################\n", "# TrainingArguments parameters\n", "################################################################################\n", "\n", "# Output directory where the model predictions and checkpoints will be stored\n", "output_dir = \"./results\"\n", "\n", "# Number of training epochs\n", "num_train_epochs = 1\n", "\n", "# Enable fp16/bf16 training (set bf16 to True with an A100)\n", "fp16 = False\n", "bf16 = False\n", "\n", "# Batch size per GPU for training\n", "per_device_train_batch_size = 4\n", "\n", "# Batch size per GPU for evaluation\n", "per_device_eval_batch_size = 4\n", "\n", "# Number of update steps to accumulate the gradients for\n", "gradient_accumulation_steps = 1\n", "\n", "# Enable gradient checkpointing\n", "gradient_checkpointing = True\n", "\n", "# Maximum gradient normal (gradient clipping)\n", "max_grad_norm = 0.3\n", "\n", "# Initial learning rate (AdamW optimizer)\n", "learning_rate = 2e-4\n", "\n", "# Weight decay to apply to all layers except bias/LayerNorm weights\n", "weight_decay = 0.001\n", "\n", "# Optimizer to use\n", "optim = \"paged_adamw_32bit\"\n", "\n", "# Learning rate schedule\n", "lr_scheduler_type = \"cosine\"\n", "\n", "# Number of training steps (overrides num_train_epochs)\n", "max_steps = -1\n", "\n", "# Ratio of steps for a linear warmup (from 0 to learning rate)\n", "warmup_ratio = 0.03\n", "\n", "# Group sequences into batches with same length\n", "# Saves memory and speeds up training considerably\n", "group_by_length = True\n", "\n", "# Save checkpoint every X updates steps\n", "save_steps = 0\n", "\n", "# Log every X updates steps\n", "logging_steps = 25\n", "\n", "################################################################################\n", "# SFT parameters\n", "################################################################################\n", "\n", "# Maximum sequence length to use\n", "max_seq_length = None\n", "\n", "# Pack multiple short examples in the same input sequence to increase efficiency\n", "packing = False\n", "\n", "# Load the entire model on the GPU 0\n", "device_map = {\"\": 0}" ], "metadata": { "id": "ib_We3NLtj2E" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Load dataset (you can process it here)\n", "dataset = load_dataset(dataset_name, split=\"train\")\n", "\n", "# Load tokenizer and model with QLoRA configuration\n", "compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n", "\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=use_4bit,\n", " bnb_4bit_quant_type=bnb_4bit_quant_type,\n", " bnb_4bit_compute_dtype=compute_dtype,\n", " bnb_4bit_use_double_quant=use_nested_quant,\n", ")\n", "\n", "# Check GPU compatibility with bfloat16\n", "if compute_dtype == torch.float16 and use_4bit:\n", " major, _ = torch.cuda.get_device_capability()\n", " if major >= 8:\n", " print(\"=\" * 80)\n", " print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n", " print(\"=\" * 80)\n", "\n", "# Load base model\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " quantization_config=bnb_config,\n", " device_map=device_map\n", ")\n", "model.config.use_cache = False\n", "model.config.pretraining_tp = 1\n", "\n", "# Load LLaMA tokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n", "\n", "# Load LoRA configuration\n", "peft_config = LoraConfig(\n", " lora_alpha=lora_alpha,\n", " lora_dropout=lora_dropout,\n", " r=lora_r,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", ")\n", "\n", "# Set training parameters\n", "training_arguments = TrainingArguments(\n", " output_dir=output_dir,\n", " num_train_epochs=num_train_epochs,\n", " per_device_train_batch_size=per_device_train_batch_size,\n", " gradient_accumulation_steps=gradient_accumulation_steps,\n", " optim=optim,\n", " save_steps=save_steps,\n", " logging_steps=logging_steps,\n", " learning_rate=learning_rate,\n", " weight_decay=weight_decay,\n", " fp16=fp16,\n", " bf16=bf16,\n", " max_grad_norm=max_grad_norm,\n", " max_steps=max_steps,\n", " warmup_ratio=warmup_ratio,\n", " group_by_length=group_by_length,\n", " lr_scheduler_type=lr_scheduler_type,\n", " report_to=\"tensorboard\"\n", ")\n", "\n", "# Set supervised fine-tuning parameters\n", "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " dataset_text_field=\"text\",\n", " max_seq_length=max_seq_length,\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", " packing=packing,\n", ")\n", "\n", "# Train model\n", "trainer.train()\n", "\n", "# Save trained model\n", "trainer.model.save_pretrained(new_model)" ], "metadata": { "id": "OJXpOgBFuSrc", "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "52c4bf7418f74bc79a8c12fe35901974", "c5e609d111b34d408a53a4cd71bb43d5", "0e0a20b5ed7a44e9834022e7eba2194d", "b5627331e78e4eb28765ed20f32cf403", "8084d4cb267f4a52b3d80ec34d291190", "a8dcdf1f7ab64242acb057e8b54ebf79", "1ca492fddbaa4ea7a3226649154e01fd", "a8eda8bfe08e4152a80c63830138c96d", "1f258eacd6d0472385d41523b65dea8b", "228b1bcf604f454f8060a250b58008a1", "90b281e9c5ed4e77ab93e5879d0b15a3" ] }, "outputId": "8d06ed40-ea32-4d85-8665-413bde069607" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [250/250 24:05, Epoch 1/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
11.350100
22.015800
31.048700
41.287700
51.451200
61.659900
71.472300
81.326700
91.140000
101.395300
111.776400
121.169100
131.434700
141.550400
151.440400
161.352100
171.062800
181.173400
191.385300
201.433300
211.787800
221.600200
231.067800
241.679300
251.209900
261.305200
271.465300
281.781800
291.152400
301.434400
311.399300
321.796300
331.674500
341.567600
351.830000
361.720200
371.335800
381.333000
392.044900
401.832200
411.533900
421.259900
431.372300
441.551600
452.002400
461.956100
472.441900
482.289100
491.544500
502.040300
511.103800
521.630800
531.437900
541.820900
551.080300
561.029200
570.999400
580.795900
591.331600
601.099500
611.199000
621.146000
631.129000
641.109500
651.207000
661.360600
671.879000
681.317200
691.033300
701.153400
711.112400
721.218400
731.134600
741.053200
751.008900
761.077000
771.245000
781.395900
791.488800
801.382500
811.442200
821.028500
831.208500
841.780200
851.679300
861.276600
871.374600
881.490000
891.567100
901.435000
911.329800
921.387600
930.971400
941.293800
951.585900
961.431700
971.948900
981.630500
991.839100
1001.740900
1010.717200
1020.958100
1031.625900
1041.150000
1050.999200
1061.253100
1071.007600
1081.049700
1091.265900
1101.251300
1111.109500
1121.652500
1131.238000
1141.521300
1151.002400
1160.982400
1171.389300
1181.114900
1191.093900
1201.254200
1211.132300
1220.925300
1231.292700
1241.317600
1251.080400
1260.918800
1271.203400
1281.098800
1291.360800
1301.256900
1311.392600
1321.167600
1331.134900
1341.423700
1351.111200
1361.081600
1371.806000
1381.238800
1391.306800
1401.421900
1411.467300
1421.245100
1431.594200
1441.426000
1451.393800
1461.894400
1471.331200
1481.519400
1491.926300
1501.293200
1511.135100
1521.066700
1530.856900
1541.021500
1550.808800
1560.936300
1570.979700
1581.100200
1591.091400
1600.918800
1611.370800
1621.380300
1630.965300
1641.142400
1651.436400
1660.970400
1670.872600
1681.662500
1691.623500
1701.481700
1710.822300
1721.605500
1731.769800
1741.320100
1750.969300
1760.798700
1771.233200
1781.168500
1791.251400
1801.221500
1811.491100
1821.010200
1831.375500
1841.722900
1851.179300
1861.474400
1871.968200
1881.297200
1891.564500
1901.480700
1911.464700
1921.901400
1931.620100
1941.509000
1951.587000
1961.510000
1971.773900
1981.473200
1991.660400
2001.832600
2011.021400
2021.120400
2031.030200
2041.167500
2050.853200
2060.927000
2071.157400
2081.071600
2091.195400
2101.155800
2111.502300
2121.091600
2131.225200
2141.148900
2151.238200
2161.600200
2171.203600
2181.266200
2190.970900
2201.451000
2211.281300
2220.952500
2231.313800
2240.915700
2251.040000
2261.493800
2271.186400
2281.278700
2291.061100
2301.209000
2310.881400
2321.659300
2331.135200
2341.497800
2351.557500
2360.849200
2371.329200
2381.147700
2391.764600
2401.740000
2412.043700
2421.675000
2431.809600
2441.721400
2452.343300
2461.830400
2471.754400
2481.741900
2492.011000
2501.741700

" ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "# %load_ext tensorboard\n", "# %tensorboard --logdir results/runs" ], "metadata": { "id": "crj9svNe4hU5" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Ignore warnings\n", "logging.set_verbosity(logging.CRITICAL)\n", "\n", "# Run text generation pipeline with our next model\n", "prompt = \"What is a large language model?\"\n", "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n", "result = pipe(f\"[INST] {prompt} [/INST]\")\n", "print(result[0]['generated_text'])" ], "metadata": { "id": "frlSLPin4IJ4", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n", " warnings.warn(\n", "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n", " warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n", "\n", "Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n", "\n", "Some examples of large language models include:\n", "\n", "* BERT (Bidirectional Encoder Representations from Transformers\n" ] } ] }, { "cell_type": "code", "source": [ "# Empty VRAM\n", "del model\n", "del pipe\n", "del trainer\n", "import gc\n", "gc.collect()\n", "gc.collect()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mkQCviG0Zta-", "outputId": "e7c4ab10-4039-4490-b7f0-6ea118bdd709" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "19965" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "# Reload model in FP16 and merge it with LoRA weights\n", "base_model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " low_cpu_mem_usage=True,\n", " return_dict=True,\n", " torch_dtype=torch.float16,\n", " device_map=device_map,\n", ")\n", "model = PeftModel.from_pretrained(base_model, new_model)\n", "model = model.merge_and_unload()\n", "\n", "# Reload tokenizer to save it\n", "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\"" ], "metadata": { "id": "QQn30cRtAZ-P", "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "051d193cd87f47c1971fb87544e1e615", "9d7247c119e642c5894f15ca6974ef3e", "a79c22bb34ec4f698a00752b47a6f631", "d95f3a3f26c6470d984542cdfd68bec1", "343e11c62a59448eb43bbc0c31bf5f11", "a153c96bd1fe4c48a41e9b9c7c00dd6e", "84da055d24694320843e13ad37438792", "e375632975904402baea46163e2eeca1", "95501d0b5a22407288f008bf8cc69726", "6aef866a6c474dfabb2140ded933c5aa", "d66fa096d442423c9447cbfbdc1aad8d" ] }, "outputId": "1c5ef3c4-d107-4c43-9bd6-0ca72903db0e" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00