You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
llm-course/Quantize_Llama_2_models_usi...

2218 lines
173 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyMohoDhmmKsuh9OLDHor3GB",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"c281b60e104f4c5da547bbdd7208d4bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "VBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "VBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "VBoxView",
"box_style": "",
"children": [
"IPY_MODEL_2e2fabac70484c1c8b16fa6ca8fd8537",
"IPY_MODEL_bf53c635fa374420ad850eea22cd1e31",
"IPY_MODEL_065d59126a734c1aa096ba40cd4a129f",
"IPY_MODEL_e8855d5678a342f5a33171aa74d3b7bc"
],
"layout": "IPY_MODEL_1c8a6b959f9c4443a92f58eff1b03077"
}
},
"74b084c97f6f46d293a197bf9804460c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9fb5726f91734b1da149784680dc9624",
"placeholder": "",
"style": "IPY_MODEL_202a8eb11eda4e58942113fbeacfdc3d",
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
}
},
"1409574c4f9742e7a711965dd2c8ad87": {
"model_module": "@jupyter-widgets/controls",
"model_name": "PasswordModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "PasswordModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "PasswordView",
"continuous_update": true,
"description": "Token:",
"description_tooltip": null,
"disabled": false,
"layout": "IPY_MODEL_970d4d3daf854f92bd650dc4da99e1bc",
"placeholder": "",
"style": "IPY_MODEL_24b1e007921046b1adc61db0f2bf9fc7",
"value": ""
}
},
"704ecf9409244e0b93612d6a11476346": {
"model_module": "@jupyter-widgets/controls",
"model_name": "CheckboxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Add token as git credential?",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_24d3d72f5de54de8a1ded4e528dde332",
"style": "IPY_MODEL_e90cb0ce526a4556bc643ba6c5485661",
"value": true
}
},
"b1a8d3a9a379415393d9e7d995a40788": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Login",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_76e7372656b745c889b9283b76c04148",
"style": "IPY_MODEL_ce0204c7e1ff4a51b2648284a2492262",
"tooltip": ""
}
},
"f928772f92724579b068e984d9eef387": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6dbb8e8a5ebb40a4ba910b09dde27e1a",
"placeholder": "",
"style": "IPY_MODEL_7944af54f2564920822d5d4b348896c4",
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
}
},
"1c8a6b959f9c4443a92f58eff1b03077": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": "center",
"align_self": null,
"border": null,
"bottom": null,
"display": "flex",
"flex": null,
"flex_flow": "column",
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": "50%"
}
},
"9fb5726f91734b1da149784680dc9624": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"202a8eb11eda4e58942113fbeacfdc3d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"970d4d3daf854f92bd650dc4da99e1bc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"24b1e007921046b1adc61db0f2bf9fc7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"24d3d72f5de54de8a1ded4e528dde332": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e90cb0ce526a4556bc643ba6c5485661": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"76e7372656b745c889b9283b76c04148": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ce0204c7e1ff4a51b2648284a2492262": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"6dbb8e8a5ebb40a4ba910b09dde27e1a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7944af54f2564920822d5d4b348896c4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1b55372f62494ca0baabf87f7e7f4ba8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bf612001ad354ea19de6ee45a166a43c",
"placeholder": "",
"style": "IPY_MODEL_a8e4691970b14955bfb4865bcef5e912",
"value": "Connecting..."
}
},
"bf612001ad354ea19de6ee45a166a43c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a8e4691970b14955bfb4865bcef5e912": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"2e2fabac70484c1c8b16fa6ca8fd8537": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7eb6de1a979b46f7b234724073f8bc3a",
"placeholder": "",
"style": "IPY_MODEL_6ae4640196da492fadafeb63f4bc89d2",
"value": "Token is valid (permission: write)."
}
},
"bf53c635fa374420ad850eea22cd1e31": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cef83433dbea4f529f43722fe78a8baf",
"placeholder": "",
"style": "IPY_MODEL_845ba8115d5140ac9ee22af4a9e6a03b",
"value": "Your token has been saved in your configured git credential helpers (store)."
}
},
"065d59126a734c1aa096ba40cd4a129f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cdd888041aca4dcf8adc785309071fc6",
"placeholder": "",
"style": "IPY_MODEL_cf63214cb4f8442999fa5b971035fe4f",
"value": "Your token has been saved to /root/.cache/huggingface/token"
}
},
"e8855d5678a342f5a33171aa74d3b7bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7d9b22f2b7fe4a749f989e247bce446a",
"placeholder": "",
"style": "IPY_MODEL_7f8e268db8144adfb09d089784d8411a",
"value": "Login successful"
}
},
"7eb6de1a979b46f7b234724073f8bc3a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6ae4640196da492fadafeb63f4bc89d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cef83433dbea4f529f43722fe78a8baf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"845ba8115d5140ac9ee22af4a9e6a03b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cdd888041aca4dcf8adc785309071fc6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cf63214cb4f8442999fa5b971035fe4f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7d9b22f2b7fe4a749f989e247bce446a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7f8e268db8144adfb09d089784d8411a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Quantize_Llama_2_models_using_GGUF_and_llama_cpp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Quantize Llama 2 models using GGUF and llama.cpp\n",
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
"\n",
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
"\n",
"## Usage\n",
"\n",
"* `MODEL_ID`: The ID of the model to quantize (e.g., `mlabonne/EvolCodeLlama-7b`).\n",
"* `QUANTIZATION_METHOD`: The quantization method to use.\n",
"\n",
"## Quantization methods\n",
"\n",
"The names of the quantization methods follow the naming convention: \"q\" + the number of bits + the variant used (detailed below). Here is a list of all the possible quant methods and their corresponding use cases, based on model cards made by [TheBloke](https://huggingface.co/TheBloke/):\n",
"\n",
"* `q2_k`: Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.\n",
"* `q3_k_l`: Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n",
"* `q3_k_m`: Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n",
"* `q3_k_s`: Uses Q3_K for all tensors\n",
"* `q4_0`: Original quant method, 4-bit.\n",
"* `q4_1`: Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.\n",
"* `q4_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K\n",
"* `q4_k_s`: Uses Q4_K for all tensors\n",
"* `q5_0`: Higher accuracy, higher resource usage and slower inference.\n",
"* `q5_1`: Even higher accuracy, resource usage and slower inference.\n",
"* `q5_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K\n",
"* `q5_k_s`: Uses Q5_K for all tensors\n",
"* `q6_k`: Uses Q8_K for all tensors\n",
"* `q8_0`: Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.\n",
"\n",
"As a rule of thumb, **I recommend using Q5_K_M** as it preserves most of the model's performance. Alternatively, you can use Q4_K_M if you want to save some memory. In general, K_M versions are better than K_S versions. I cannot recommend Q2_K or Q3_* versions, as they drastically decrease model performance."
],
"metadata": {
"id": "8y_Rk94LzG7I"
}
},
{
"cell_type": "code",
"source": [
"# Variables\n",
"MODEL_ID = \"mlabonne/EvolCodeLlama-7b\"\n",
"QUANTIZATION_METHODS = [\"q4_k_m\", \"q5_k_m\"]\n",
"\n",
"# Constants\n",
"MODEL_NAME = MODEL_ID.split('/')[-1]\n",
"\n",
"# Install llama.cpp\n",
"!git clone https://github.com/ggerganov/llama.cpp\n",
"!cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n",
"!pip install -r llama.cpp/requirements.txt\n",
"\n",
"# Download model\n",
"!git lfs install\n",
"!git clone https://huggingface.co/{MODEL_ID}\n",
"\n",
"# Convert to fp16\n",
"fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin\"\n",
"!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n",
"\n",
"# Quantize the model for each method in the QUANTIZATION_METHODS list\n",
"for method in QUANTIZATION_METHODS:\n",
" qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
" !./llama.cpp/quantize {fp16} {qtype} {method}"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fD24jJxq7t3k",
"outputId": "94954934-0829-44e9-a5e5-262c17e162d0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"ggml_init_cublas: found 1 CUDA devices:\n",
" Device 0: Tesla T4, compute capability 7.5\n",
"main: build = 1100 (dd0dc36)\n",
"main: quantizing 'EvolCodeLlama-7b/evolcodellama-7b.gguf.fp16.bin' to 'EvolCodeLlama-7b/evolcodellama-7b.gguf.q4_k_s.bin' as Q4_K_S\n",
"llama_model_loader: loaded meta data with 16 key-value pairs and 291 tensors from EvolCodeLlama-7b/evolcodellama-7b.gguf.fp16.bin (version GGUF V1 (support until nov 2023))\n",
"llama_model_loader: - tensor 0: token_embd.weight f16 [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - tensor 1: blk.0.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 2: blk.0.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 3: blk.0.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 5: blk.0.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 6: blk.0.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 7: blk.0.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 10: blk.1.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 11: blk.1.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 12: blk.1.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 13: blk.1.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 14: blk.1.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 15: blk.1.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 16: blk.1.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 19: blk.2.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 20: blk.2.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 21: blk.2.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 22: blk.2.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 23: blk.2.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 24: blk.2.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 25: blk.2.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 28: blk.3.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 29: blk.3.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 30: blk.3.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 31: blk.3.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 32: blk.3.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 33: blk.3.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 34: blk.3.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 37: blk.4.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 38: blk.4.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 39: blk.4.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 40: blk.4.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 41: blk.4.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 42: blk.4.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 43: blk.4.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 46: blk.5.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 47: blk.5.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 48: blk.5.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 49: blk.5.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 50: blk.5.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 51: blk.5.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 52: blk.5.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 55: blk.6.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 56: blk.6.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 57: blk.6.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 58: blk.6.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 59: blk.6.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 60: blk.6.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 61: blk.6.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 64: blk.7.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 65: blk.7.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 66: blk.7.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 67: blk.7.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 68: blk.7.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 69: blk.7.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 70: blk.7.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 73: blk.8.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 74: blk.8.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 75: blk.8.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 76: blk.8.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 77: blk.8.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 78: blk.8.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 79: blk.8.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 82: blk.9.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 83: blk.9.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 84: blk.9.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 85: blk.9.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 86: blk.9.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 87: blk.9.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 88: blk.9.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 91: blk.10.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 92: blk.10.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 93: blk.10.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 94: blk.10.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 95: blk.10.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 96: blk.10.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 97: blk.10.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 100: blk.11.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 101: blk.11.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 102: blk.11.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 103: blk.11.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 104: blk.11.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 105: blk.11.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 106: blk.11.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 109: blk.12.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 110: blk.12.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 111: blk.12.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 112: blk.12.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 113: blk.12.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 114: blk.12.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 115: blk.12.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 118: blk.13.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 119: blk.13.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 120: blk.13.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 121: blk.13.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 122: blk.13.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 123: blk.13.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 124: blk.13.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 127: blk.14.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 128: blk.14.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 129: blk.14.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 130: blk.14.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 131: blk.14.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 132: blk.14.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 133: blk.14.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 136: blk.15.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 137: blk.15.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 138: blk.15.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 139: blk.15.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 140: blk.15.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 141: blk.15.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 142: blk.15.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 145: blk.16.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 146: blk.16.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 147: blk.16.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 148: blk.16.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 149: blk.16.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 150: blk.16.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 151: blk.16.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 154: blk.17.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 155: blk.17.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 156: blk.17.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 157: blk.17.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 158: blk.17.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 159: blk.17.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 160: blk.17.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 163: blk.18.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 164: blk.18.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 165: blk.18.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 166: blk.18.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 167: blk.18.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 168: blk.18.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 169: blk.18.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 172: blk.19.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 173: blk.19.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 174: blk.19.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 175: blk.19.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 176: blk.19.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 177: blk.19.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 178: blk.19.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 181: blk.20.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 182: blk.20.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 183: blk.20.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 184: blk.20.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 185: blk.20.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 186: blk.20.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 187: blk.20.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 190: blk.21.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 191: blk.21.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 192: blk.21.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 193: blk.21.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 194: blk.21.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 195: blk.21.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 196: blk.21.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 199: blk.22.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 200: blk.22.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 201: blk.22.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 202: blk.22.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 203: blk.22.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 204: blk.22.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 205: blk.22.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 208: blk.23.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 209: blk.23.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 210: blk.23.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 211: blk.23.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 212: blk.23.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 213: blk.23.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 214: blk.23.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 217: blk.24.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 218: blk.24.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 219: blk.24.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 220: blk.24.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 221: blk.24.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 222: blk.24.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 223: blk.24.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 226: blk.25.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 227: blk.25.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 228: blk.25.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 229: blk.25.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 230: blk.25.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 231: blk.25.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 232: blk.25.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 235: blk.26.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 236: blk.26.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 237: blk.26.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 238: blk.26.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 239: blk.26.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 240: blk.26.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 241: blk.26.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 244: blk.27.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 245: blk.27.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 246: blk.27.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 247: blk.27.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 248: blk.27.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 249: blk.27.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 250: blk.27.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 253: blk.28.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 254: blk.28.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 255: blk.28.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 256: blk.28.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 257: blk.28.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 258: blk.28.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 259: blk.28.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 262: blk.29.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 263: blk.29.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 264: blk.29.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 265: blk.29.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 266: blk.29.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 267: blk.29.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 268: blk.29.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 271: blk.30.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 272: blk.30.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 273: blk.30.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 274: blk.30.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 275: blk.30.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 276: blk.30.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 277: blk.30.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 280: blk.31.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 281: blk.31.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 282: blk.31.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 283: blk.31.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 284: blk.31.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 285: blk.31.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 286: blk.31.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 290: output.weight f16 [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - kv 0: general.architecture str \n",
"llama_model_loader: - kv 1: general.name str \n",
"llama_model_loader: - kv 2: llama.context_length u32 \n",
"llama_model_loader: - kv 3: llama.embedding_length u32 \n",
"llama_model_loader: - kv 4: llama.block_count u32 \n",
"llama_model_loader: - kv 5: llama.feed_forward_length u32 \n",
"llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n",
"llama_model_loader: - kv 7: llama.attention.head_count u32 \n",
"llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n",
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n",
"llama_model_loader: - kv 10: llama.rope.freq_base f32 \n",
"llama_model_loader: - kv 11: general.file_type u32 \n",
"llama_model_loader: - kv 12: tokenizer.ggml.model str \n",
"llama_model_loader: - kv 13: tokenizer.ggml.tokens arr \n",
"llama_model_loader: - kv 14: tokenizer.ggml.scores arr \n",
"llama_model_loader: - kv 15: tokenizer.ggml.token_type arr \n",
"llama_model_loader: - type f32: 65 tensors\n",
"llama_model_loader: - type f16: 226 tensors\n",
"llama_model_quantize_internal: meta size = 741408 bytes\n",
"[ 1/ 291] token_embd.weight - [ 4096, 32016, 1, 1], type = f16, quantizing to q4_K .. size = 250.12 MB -> 70.35 MB | hist: \n",
"[ 2/ 291] blk.0.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 3/ 291] blk.0.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 4/ 291] blk.0.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 5/ 291] blk.0.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 6/ 291] blk.0.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 7/ 291] blk.0.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 8/ 291] blk.0.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 9/ 291] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 10/ 291] blk.0.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 11/ 291] blk.1.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 12/ 291] blk.1.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 13/ 291] blk.1.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 14/ 291] blk.1.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 15/ 291] blk.1.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 16/ 291] blk.1.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 17/ 291] blk.1.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 18/ 291] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 19/ 291] blk.1.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 20/ 291] blk.2.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 21/ 291] blk.2.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 22/ 291] blk.2.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 23/ 291] blk.2.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 24/ 291] blk.2.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 25/ 291] blk.2.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 26/ 291] blk.2.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 27/ 291] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 28/ 291] blk.2.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 29/ 291] blk.3.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 30/ 291] blk.3.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 31/ 291] blk.3.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 32/ 291] blk.3.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 33/ 291] blk.3.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 34/ 291] blk.3.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 35/ 291] blk.3.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 36/ 291] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 37/ 291] blk.3.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 38/ 291] blk.4.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 39/ 291] blk.4.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 40/ 291] blk.4.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 41/ 291] blk.4.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 42/ 291] blk.4.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 43/ 291] blk.4.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 44/ 291] blk.4.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 45/ 291] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 46/ 291] blk.4.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 47/ 291] blk.5.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 48/ 291] blk.5.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 49/ 291] blk.5.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 50/ 291] blk.5.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 51/ 291] blk.5.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 52/ 291] blk.5.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 53/ 291] blk.5.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 54/ 291] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 55/ 291] blk.5.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 56/ 291] blk.6.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 57/ 291] blk.6.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 58/ 291] blk.6.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 59/ 291] blk.6.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 60/ 291] blk.6.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 61/ 291] blk.6.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 62/ 291] blk.6.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 63/ 291] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 64/ 291] blk.6.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 65/ 291] blk.7.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 66/ 291] blk.7.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 67/ 291] blk.7.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 68/ 291] blk.7.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 69/ 291] blk.7.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 70/ 291] blk.7.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 71/ 291] blk.7.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 72/ 291] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 73/ 291] blk.7.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 74/ 291] blk.8.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 75/ 291] blk.8.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 76/ 291] blk.8.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 77/ 291] blk.8.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 78/ 291] blk.8.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 79/ 291] blk.8.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 80/ 291] blk.8.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 81/ 291] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 82/ 291] blk.8.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 83/ 291] blk.9.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 84/ 291] blk.9.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 85/ 291] blk.9.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 86/ 291] blk.9.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 87/ 291] blk.9.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 88/ 291] blk.9.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 89/ 291] blk.9.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 90/ 291] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 91/ 291] blk.9.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 92/ 291] blk.10.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 93/ 291] blk.10.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 94/ 291] blk.10.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 95/ 291] blk.10.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 96/ 291] blk.10.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 97/ 291] blk.10.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 98/ 291] blk.10.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 99/ 291] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 100/ 291] blk.10.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 101/ 291] blk.11.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 102/ 291] blk.11.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 103/ 291] blk.11.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 104/ 291] blk.11.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 105/ 291] blk.11.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 106/ 291] blk.11.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 107/ 291] blk.11.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 108/ 291] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 109/ 291] blk.11.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 110/ 291] blk.12.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 111/ 291] blk.12.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 112/ 291] blk.12.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 113/ 291] blk.12.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 114/ 291] blk.12.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 115/ 291] blk.12.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 116/ 291] blk.12.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 117/ 291] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 118/ 291] blk.12.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 119/ 291] blk.13.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 120/ 291] blk.13.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 121/ 291] blk.13.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 122/ 291] blk.13.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 123/ 291] blk.13.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 124/ 291] blk.13.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 125/ 291] blk.13.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 126/ 291] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 127/ 291] blk.13.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 128/ 291] blk.14.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 129/ 291] blk.14.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 130/ 291] blk.14.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 131/ 291] blk.14.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 132/ 291] blk.14.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 133/ 291] blk.14.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 134/ 291] blk.14.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 135/ 291] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 136/ 291] blk.14.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 137/ 291] blk.15.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 138/ 291] blk.15.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 139/ 291] blk.15.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 140/ 291] blk.15.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 141/ 291] blk.15.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 142/ 291] blk.15.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 143/ 291] blk.15.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 144/ 291] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 145/ 291] blk.15.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 146/ 291] blk.16.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 147/ 291] blk.16.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 148/ 291] blk.16.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 149/ 291] blk.16.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 150/ 291] blk.16.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 151/ 291] blk.16.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 152/ 291] blk.16.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 153/ 291] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 154/ 291] blk.16.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 155/ 291] blk.17.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 156/ 291] blk.17.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 157/ 291] blk.17.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 158/ 291] blk.17.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 159/ 291] blk.17.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 160/ 291] blk.17.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 161/ 291] blk.17.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 162/ 291] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 163/ 291] blk.17.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 164/ 291] blk.18.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 165/ 291] blk.18.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 166/ 291] blk.18.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 167/ 291] blk.18.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 168/ 291] blk.18.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 169/ 291] blk.18.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 170/ 291] blk.18.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 171/ 291] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 172/ 291] blk.18.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 173/ 291] blk.19.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 174/ 291] blk.19.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 175/ 291] blk.19.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 176/ 291] blk.19.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 177/ 291] blk.19.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 178/ 291] blk.19.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 179/ 291] blk.19.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 180/ 291] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 181/ 291] blk.19.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 182/ 291] blk.20.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 183/ 291] blk.20.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 184/ 291] blk.20.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 185/ 291] blk.20.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 186/ 291] blk.20.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 187/ 291] blk.20.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 188/ 291] blk.20.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 189/ 291] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 190/ 291] blk.20.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 191/ 291] blk.21.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 192/ 291] blk.21.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 193/ 291] blk.21.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 194/ 291] blk.21.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 195/ 291] blk.21.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 196/ 291] blk.21.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 197/ 291] blk.21.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 198/ 291] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 199/ 291] blk.21.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 200/ 291] blk.22.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 201/ 291] blk.22.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 202/ 291] blk.22.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 203/ 291] blk.22.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 204/ 291] blk.22.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 205/ 291] blk.22.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 206/ 291] blk.22.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 207/ 291] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 208/ 291] blk.22.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 209/ 291] blk.23.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 210/ 291] blk.23.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 211/ 291] blk.23.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 212/ 291] blk.23.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 213/ 291] blk.23.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 214/ 291] blk.23.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 215/ 291] blk.23.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 216/ 291] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 217/ 291] blk.23.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 218/ 291] blk.24.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 219/ 291] blk.24.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 220/ 291] blk.24.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 221/ 291] blk.24.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 222/ 291] blk.24.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 223/ 291] blk.24.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 224/ 291] blk.24.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 225/ 291] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 226/ 291] blk.24.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 227/ 291] blk.25.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 228/ 291] blk.25.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 229/ 291] blk.25.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 230/ 291] blk.25.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 231/ 291] blk.25.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 232/ 291] blk.25.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 233/ 291] blk.25.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 234/ 291] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 235/ 291] blk.25.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 236/ 291] blk.26.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 237/ 291] blk.26.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 238/ 291] blk.26.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 239/ 291] blk.26.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 240/ 291] blk.26.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 241/ 291] blk.26.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 242/ 291] blk.26.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 243/ 291] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 244/ 291] blk.26.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 245/ 291] blk.27.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 246/ 291] blk.27.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 247/ 291] blk.27.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 248/ 291] blk.27.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 249/ 291] blk.27.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 250/ 291] blk.27.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 251/ 291] blk.27.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 252/ 291] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 253/ 291] blk.27.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 254/ 291] blk.28.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 255/ 291] blk.28.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 256/ 291] blk.28.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 257/ 291] blk.28.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 258/ 291] blk.28.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 259/ 291] blk.28.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 260/ 291] blk.28.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 261/ 291] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 262/ 291] blk.28.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 263/ 291] blk.29.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 264/ 291] blk.29.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 265/ 291] blk.29.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 266/ 291] blk.29.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 267/ 291] blk.29.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 268/ 291] blk.29.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 269/ 291] blk.29.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 270/ 291] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 271/ 291] blk.29.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 272/ 291] blk.30.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 273/ 291] blk.30.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 274/ 291] blk.30.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 275/ 291] blk.30.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 276/ 291] blk.30.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 277/ 291] blk.30.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 278/ 291] blk.30.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 279/ 291] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 280/ 291] blk.30.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 281/ 291] blk.31.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 282/ 291] blk.31.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 283/ 291] blk.31.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 284/ 291] blk.31.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 285/ 291] blk.31.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 286/ 291] blk.31.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 287/ 291] blk.31.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 288/ 291] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 289/ 291] blk.31.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 290/ 291] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 291/ 291] output.weight - [ 4096, 32016, 1, 1], type = f16, quantizing to q6_K .. size = 250.12 MB -> 102.59 MB | hist: \n",
"llama_model_quantize_internal: model size = 12853.27 MB\n",
"llama_model_quantize_internal: quant size = 3677.45 MB\n",
"\n",
"main: quantize time = 1089230.46 ms\n",
"main: total time = 1089230.46 ms\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Run inference\n",
"\n",
"Here is a simple script to run your quantized models. I'm offloading every layer to the GPU (35 for a 7b parameter model) to speed up inference."
],
"metadata": {
"id": "WqI1CPiXI4dP"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"\n",
"model_list = [file for file in os.listdir(MODEL_NAME) if \"gguf\" in file]\n",
"\n",
"prompt = input(\"Enter your prompt: \")\n",
"chosen_method = input(\"Name of the model (options: \" + \", \".join(model_list) + \"): \")\n",
"\n",
"# Verify the chosen method is in the list\n",
"if chosen_method not in model_list:\n",
" print(\"Invalid name\")\n",
"else:\n",
" qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
" !./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vNPL9WYg78l-",
"outputId": "3c3e7d2f-f0de-429d-fd97-dab480bc514a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Enter your prompt: prompt\n",
"Please specify the quantization method to run the model (options: q4_k_s): q4_k_s\n",
"main: build = 1100 (dd0dc36)\n",
"main: seed = 1693227123\n",
"ggml_init_cublas: found 1 CUDA devices:\n",
" Device 0: Tesla T4, compute capability 7.5\n",
"llama_model_loader: loaded meta data with 17 key-value pairs and 291 tensors from EvolCodeLlama-7b/evolcodellama-7b.gguf.q4_k_s.bin (version GGUF V2 (latest))\n",
"llama_model_loader: - tensor 0: token_embd.weight q4_K [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - tensor 1: blk.0.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 2: blk.0.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 3: blk.0.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 4: blk.0.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 5: blk.0.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 6: blk.0.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 7: blk.0.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 10: blk.1.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 11: blk.1.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 12: blk.1.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 13: blk.1.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 14: blk.1.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 15: blk.1.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 16: blk.1.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 19: blk.2.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 20: blk.2.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 21: blk.2.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 22: blk.2.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 23: blk.2.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 24: blk.2.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 25: blk.2.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 28: blk.3.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 29: blk.3.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 30: blk.3.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 31: blk.3.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 32: blk.3.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 33: blk.3.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 34: blk.3.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 37: blk.4.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 38: blk.4.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 39: blk.4.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 40: blk.4.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 41: blk.4.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 42: blk.4.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 43: blk.4.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 46: blk.5.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 47: blk.5.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 48: blk.5.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 49: blk.5.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 50: blk.5.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 51: blk.5.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 52: blk.5.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 55: blk.6.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 56: blk.6.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 57: blk.6.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 58: blk.6.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 59: blk.6.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 60: blk.6.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 61: blk.6.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 64: blk.7.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 65: blk.7.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 66: blk.7.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 67: blk.7.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 68: blk.7.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 69: blk.7.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 70: blk.7.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 73: blk.8.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 74: blk.8.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 75: blk.8.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 76: blk.8.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 77: blk.8.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 78: blk.8.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 79: blk.8.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 82: blk.9.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 83: blk.9.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 84: blk.9.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 85: blk.9.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 86: blk.9.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 87: blk.9.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 88: blk.9.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 91: blk.10.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 92: blk.10.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 93: blk.10.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 94: blk.10.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 95: blk.10.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 96: blk.10.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 97: blk.10.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 100: blk.11.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 101: blk.11.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 102: blk.11.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 103: blk.11.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 104: blk.11.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 105: blk.11.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 106: blk.11.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 109: blk.12.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 110: blk.12.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 111: blk.12.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 112: blk.12.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 113: blk.12.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 114: blk.12.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 115: blk.12.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 118: blk.13.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 119: blk.13.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 120: blk.13.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 121: blk.13.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 122: blk.13.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 123: blk.13.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 124: blk.13.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 127: blk.14.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 128: blk.14.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 129: blk.14.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 130: blk.14.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 131: blk.14.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 132: blk.14.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 133: blk.14.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 136: blk.15.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 137: blk.15.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 138: blk.15.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 139: blk.15.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 141: blk.15.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 142: blk.15.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 145: blk.16.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 146: blk.16.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 147: blk.16.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 148: blk.16.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 149: blk.16.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 150: blk.16.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 151: blk.16.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 154: blk.17.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 155: blk.17.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 156: blk.17.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 157: blk.17.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 158: blk.17.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 159: blk.17.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 160: blk.17.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 163: blk.18.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 164: blk.18.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 165: blk.18.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 166: blk.18.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 167: blk.18.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 168: blk.18.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 169: blk.18.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 172: blk.19.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 173: blk.19.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 174: blk.19.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 175: blk.19.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 176: blk.19.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 177: blk.19.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 178: blk.19.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 181: blk.20.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 182: blk.20.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 183: blk.20.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 184: blk.20.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 185: blk.20.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 186: blk.20.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 187: blk.20.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 190: blk.21.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 191: blk.21.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 192: blk.21.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 193: blk.21.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 194: blk.21.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 195: blk.21.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 196: blk.21.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 199: blk.22.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 200: blk.22.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 201: blk.22.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 202: blk.22.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 203: blk.22.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 204: blk.22.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 205: blk.22.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 208: blk.23.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 209: blk.23.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 210: blk.23.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 211: blk.23.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 212: blk.23.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 213: blk.23.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 214: blk.23.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 217: blk.24.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 218: blk.24.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 219: blk.24.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 220: blk.24.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 221: blk.24.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 222: blk.24.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 223: blk.24.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 226: blk.25.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 227: blk.25.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 228: blk.25.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 229: blk.25.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 230: blk.25.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 231: blk.25.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 232: blk.25.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 235: blk.26.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 236: blk.26.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 237: blk.26.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 238: blk.26.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 239: blk.26.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 240: blk.26.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 241: blk.26.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 244: blk.27.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 245: blk.27.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 246: blk.27.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 247: blk.27.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 248: blk.27.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 249: blk.27.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 250: blk.27.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 253: blk.28.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 254: blk.28.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 255: blk.28.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 256: blk.28.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 257: blk.28.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 258: blk.28.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 259: blk.28.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 262: blk.29.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 263: blk.29.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 264: blk.29.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 265: blk.29.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 266: blk.29.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 267: blk.29.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 268: blk.29.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 271: blk.30.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 272: blk.30.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 273: blk.30.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 274: blk.30.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 275: blk.30.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 276: blk.30.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 277: blk.30.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 280: blk.31.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 281: blk.31.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 282: blk.31.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 283: blk.31.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 284: blk.31.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 285: blk.31.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 286: blk.31.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 290: output.weight q6_K [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - kv 0: general.architecture str \n",
"llama_model_loader: - kv 1: general.name str \n",
"llama_model_loader: - kv 2: llama.context_length u32 \n",
"llama_model_loader: - kv 3: llama.embedding_length u32 \n",
"llama_model_loader: - kv 4: llama.block_count u32 \n",
"llama_model_loader: - kv 5: llama.feed_forward_length u32 \n",
"llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n",
"llama_model_loader: - kv 7: llama.attention.head_count u32 \n",
"llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n",
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n",
"llama_model_loader: - kv 10: llama.rope.freq_base f32 \n",
"llama_model_loader: - kv 11: general.file_type u32 \n",
"llama_model_loader: - kv 12: tokenizer.ggml.model str \n",
"llama_model_loader: - kv 13: tokenizer.ggml.tokens arr \n",
"llama_model_loader: - kv 14: tokenizer.ggml.scores arr \n",
"llama_model_loader: - kv 15: tokenizer.ggml.token_type arr \n",
"llama_model_loader: - kv 16: general.quantization_version u32 \n",
"llama_model_loader: - type f32: 65 tensors\n",
"llama_model_loader: - type q4_K: 217 tensors\n",
"llama_model_loader: - type q5_K: 8 tensors\n",
"llama_model_loader: - type q6_K: 1 tensors\n",
"llm_load_print_meta: format = GGUF V2 (latest)\n",
"llm_load_print_meta: arch = llama\n",
"llm_load_print_meta: vocab type = SPM\n",
"llm_load_print_meta: n_vocab = 32016\n",
"llm_load_print_meta: n_merges = 0\n",
"llm_load_print_meta: n_ctx_train = 16384\n",
"llm_load_print_meta: n_ctx = 512\n",
"llm_load_print_meta: n_embd = 4096\n",
"llm_load_print_meta: n_head = 32\n",
"llm_load_print_meta: n_head_kv = 32\n",
"llm_load_print_meta: n_layer = 32\n",
"llm_load_print_meta: n_rot = 128\n",
"llm_load_print_meta: n_gqa = 1\n",
"llm_load_print_meta: f_norm_eps = 1.0e-05\n",
"llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n",
"llm_load_print_meta: n_ff = 11008\n",
"llm_load_print_meta: freq_base = 1000000.0\n",
"llm_load_print_meta: freq_scale = 1\n",
"llm_load_print_meta: model type = 7B\n",
"llm_load_print_meta: model ftype = mostly Q4_K - Small\n",
"llm_load_print_meta: model size = 6.74 B\n",
"llm_load_print_meta: general.name = LLaMA\n",
"llm_load_print_meta: BOS token = 1 '<s>'\n",
"llm_load_print_meta: EOS token = 2 '</s>'\n",
"llm_load_print_meta: UNK token = 0 '<unk>'\n",
"llm_load_print_meta: LF token = 13 '<0x0A>'\n",
"llm_load_tensors: ggml ctx size = 0.09 MB\n",
"llm_load_tensors: using CUDA for GPU acceleration\n",
"llm_load_tensors: mem required = 70.44 MB (+ 256.00 MB per state)\n",
"llm_load_tensors: offloading 32 repeating layers to GPU\n",
"llm_load_tensors: offloading non-repeating layers to GPU\n",
"llm_load_tensors: offloading v cache to GPU\n",
"llm_load_tensors: offloading k cache to GPU\n",
"llm_load_tensors: offloaded 35/35 layers to GPU\n",
"llm_load_tensors: VRAM used: 3864 MB\n",
"..................................................................................................\n",
"llama_new_context_with_model: kv self size = 256.00 MB\n",
"llama_new_context_with_model: compute buffer total size = 71.94 MB\n",
"llama_new_context_with_model: VRAM scratch buffer: 70.53 MB\n",
"\n",
"system_info: n_threads = 2 / 2 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n",
"sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000\n",
"generate: n_ctx = 512, n_batch = 512, n_predict = 128, n_keep = 0\n",
"\n",
"\n",
"\u001b[33m prompt\u001b[0m.\t\t\t\t\n",
"\t\t\t\t\tif( !this->m_pMiscSettings ) { return; }\t// If no misc settings, do nothing\n",
"\t\t\t\t\t\n",
"\t\t\t\t\t// Get the value of the checkbox for \"Always on top\"\n",
"\t\t\t\t\tbool alwaysOnTop = this->m_pMiscSettings->GetBool(L\"AlwaysOnTop\", false);\n",
"\t\t\t\t\tthis->SetWindowPos((alwaysOnTop ? HWND_TOPMOST : HWND_NOTOPMOST\n",
"llama_print_timings: load time = 1392.10 ms\n",
"llama_print_timings: sample time = 147.99 ms / 128 runs ( 1.16 ms per token, 864.92 tokens per second)\n",
"llama_print_timings: prompt eval time = 261.80 ms / 2 tokens ( 130.90 ms per token, 7.64 tokens per second)\n",
"llama_print_timings: eval time = 5923.18 ms / 127 runs ( 46.64 ms per token, 21.44 tokens per second)\n",
"llama_print_timings: total time = 6370.96 ms\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Push to hub\n",
"\n",
"To push your model to the hub, you'll need to input your Hugging Face token (https://huggingface.co/settings/tokens) in Google Colab's \"Secrets\" tab. The following code creates a new repo with the \"-GGUF\" suffix. Don't forget to change the `username` variable."
],
"metadata": {
"id": "Ar8pO7bb80US"
}
},
{
"cell_type": "code",
"source": [
"!pip install -q huggingface_hub\n",
"from huggingface_hub import create_repo, HfApi\n",
"from google.colab import userdata\n",
"\n",
"# Defined in the secrets tab in Google Colab\n",
"hf_token = userdata.get('huggingface')\n",
"\n",
"api = HfApi()\n",
"username = \"mlabonne\"\n",
"\n",
"# Create empty repo\n",
"create_repo(\n",
" repo_id = f\"{username}/{MODEL_NAME}-GGUF\",\n",
" repo_type=\"model\",\n",
" exist_ok=True,\n",
" token=hf_token\n",
")\n",
"\n",
"# Upload gguf files\n",
"api.upload_folder(\n",
" folder_path=MODEL_NAME,\n",
" repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n",
" allow_patterns=f\"*.gguf\",\n",
" token=hf_token\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 163,
"referenced_widgets": [
"c281b60e104f4c5da547bbdd7208d4bc",
"74b084c97f6f46d293a197bf9804460c",
"1409574c4f9742e7a711965dd2c8ad87",
"704ecf9409244e0b93612d6a11476346",
"b1a8d3a9a379415393d9e7d995a40788",
"f928772f92724579b068e984d9eef387",
"1c8a6b959f9c4443a92f58eff1b03077",
"9fb5726f91734b1da149784680dc9624",
"202a8eb11eda4e58942113fbeacfdc3d",
"970d4d3daf854f92bd650dc4da99e1bc",
"24b1e007921046b1adc61db0f2bf9fc7",
"24d3d72f5de54de8a1ded4e528dde332",
"e90cb0ce526a4556bc643ba6c5485661",
"76e7372656b745c889b9283b76c04148",
"ce0204c7e1ff4a51b2648284a2492262",
"6dbb8e8a5ebb40a4ba910b09dde27e1a",
"7944af54f2564920822d5d4b348896c4",
"1b55372f62494ca0baabf87f7e7f4ba8",
"bf612001ad354ea19de6ee45a166a43c",
"a8e4691970b14955bfb4865bcef5e912",
"2e2fabac70484c1c8b16fa6ca8fd8537",
"bf53c635fa374420ad850eea22cd1e31",
"065d59126a734c1aa096ba40cd4a129f",
"e8855d5678a342f5a33171aa74d3b7bc",
"7eb6de1a979b46f7b234724073f8bc3a",
"6ae4640196da492fadafeb63f4bc89d2",
"cef83433dbea4f529f43722fe78a8baf",
"845ba8115d5140ac9ee22af4a9e6a03b",
"cdd888041aca4dcf8adc785309071fc6",
"cf63214cb4f8442999fa5b971035fe4f",
"7d9b22f2b7fe4a749f989e247bce446a",
"7f8e268db8144adfb09d089784d8411a"
]
},
"id": "UOyKfUD-8jmh",
"outputId": "3c8df47b-f350-4251-a19f-4b9fb1116381"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/268.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.7/268.8 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c281b60e104f4c5da547bbdd7208d4bc"
}
},
"metadata": {}
}
]
}
]
}