diff --git a/Fine_tune_LLMs_with_Axolotl.ipynb b/Fine_tune_LLMs_with_Axolotl.ipynb new file mode 100644 index 0000000..edc6e0c --- /dev/null +++ b/Fine_tune_LLMs_with_Axolotl.ipynb @@ -0,0 +1,1555 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Fine-tune LLMs with Axolotl\n", + "\n", + "> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n", + "\n", + "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on [Giorgio](https://github.com/g-i-o-r-g-i-o)'s notebook and Axolotl's [example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/examples/colab-notebooks/colab-axolotl-example.ipynb)." + ], + "metadata": { + "id": "mL-BPZyZ0gtV" + } + }, + { + "cell_type": "code", + "source": [ + "!git clone -q https://github.com/OpenAccess-AI-Collective/axolotl\n", + "%cd axolotl\n", + "!pip install -qqq packaging huggingface_hub --progress-bar off\n", + "!pip install -qqq -e '.[flash-attn,deepspeed]' --progress-bar off" + ], + "metadata": { + "id": "BI6B0Bfe0hxr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import yaml\n", + "\n", + "new_model = \"mlabonne/TinyAlpaca\"\n", + "yaml_string = \"\"\"\n", + "base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\n", + "model_type: LlamaForCausalLM\n", + "tokenizer_type: LlamaTokenizer\n", + "is_llama_derived_model: true\n", + "\n", + "load_in_8bit: false\n", + "load_in_4bit: true\n", + "strict: false\n", + "\n", + "datasets:\n", + " - path: mhenrichsen/alpaca_2k_test\n", + " type: alpaca\n", + "dataset_prepared_path:\n", + "val_set_size: 0.05\n", + "output_dir: ./qlora-out\n", + "\n", + "adapter: qlora\n", + "lora_model_dir:\n", + "\n", + "sequence_len: 1096\n", + "sample_packing: true\n", + "pad_to_sequence_len: true\n", + "\n", + "lora_r: 32\n", + "lora_alpha: 16\n", + "lora_dropout: 0.05\n", + "lora_target_modules:\n", + "lora_target_linear: true\n", + "lora_fan_in_fan_out:\n", + "\n", + "wandb_project:\n", + "wandb_entity:\n", + "wandb_watch:\n", + "wandb_name:\n", + "wandb_log_model:\n", + "\n", + "mlflow_experiment_name: colab-example\n", + "\n", + "gradient_accumulation_steps: 1\n", + "micro_batch_size: 1\n", + "num_epochs: 4\n", + "max_steps: 20\n", + "optimizer: paged_adamw_32bit\n", + "lr_scheduler: cosine\n", + "learning_rate: 0.0002\n", + "\n", + "train_on_inputs: false\n", + "group_by_length: false\n", + "bf16: false\n", + "fp16: true\n", + "tf32: false\n", + "\n", + "gradient_checkpointing: true\n", + "early_stopping_patience:\n", + "resume_from_checkpoint:\n", + "local_rank:\n", + "logging_steps: 1\n", + "xformers_attention:\n", + "flash_attention: false\n", + "\n", + "warmup_steps: 10\n", + "evals_per_epoch:\n", + "saves_per_epoch:\n", + "debug:\n", + "deepspeed:\n", + "weight_decay: 0.0\n", + "fsdp:\n", + "fsdp_config:\n", + "special_tokens:\n", + "\n", + "\"\"\"\n", + "\n", + "# Convert the YAML string to a Python dictionary\n", + "yaml_dict = yaml.safe_load(yaml_string)\n", + "\n", + "# Specify your file path\n", + "yaml_file = 'config.yaml'\n", + "\n", + "# Write the YAML file\n", + "with open(yaml_file, 'w') as file:\n", + " yaml.dump(yaml_dict, file)" + ], + "metadata": { + "id": "70zJf1hi0huQ" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ffcc0IB9Sr-z", + "outputId": "f64291b0-0867-440b-b7b9-683fa66e3a08" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The following values were not passed to `accelerate launch` and had defaults used instead:\n", + "\t`--num_processes` was set to a value of `1`\n", + "\t`--num_machines` was set to a value of `1`\n", + "\t`--mixed_precision` was set to a value of `'no'`\n", + "\t`--dynamo_backend` was set to a value of `'no'`\n", + "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n", + "2024-01-27 22:05:58.426793: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-01-27 22:05:58.426851: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-01-27 22:05:58.428152: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-01-27 22:05:59.454203: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "[2024-01-27 22:06:00,824] [INFO] [datasets.:58] [PID:2797] PyTorch version 2.1.2 available.\n", + "[2024-01-27 22:06:00,825] [INFO] [datasets.:95] [PID:2797] TensorFlow version 2.15.0 available.\n", + "[2024-01-27 22:06:00,827] [INFO] [datasets.:108] [PID:2797] JAX version 0.4.23 available.\n", + "[2024-01-27 22:06:01,844] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "config.json: 100% 560/560 [00:00<00:00, 3.84MB/s]\n", + "[2024-01-27 22:06:03,638] [INFO] [axolotl.normalize_config:170] [PID:2797] [RANK:0] GPU memory usage baseline: 0.000GB (+0.255GB misc)\u001b[39m\n", + " dP dP dP \n", + " 88 88 88 \n", + " .d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88 \n", + " 88' `88 `8bd8' 88' `88 88 88' `88 88 88 \n", + " 88. .88 .d88b. 88. .88 88 88. .88 88 88 \n", + " `88888P8 dP' `dP `88888P' dP `88888P' dP dP \n", + " \n", + " \n", + "\n", + "\u001b[33m[2024-01-27 22:06:03,642] [WARNING] [axolotl.scripts.check_user_token:382] [PID:2797] [RANK:0] Error verifying HuggingFace token. Remember to log in using `huggingface-cli login` and get your access token from https://huggingface.co/settings/tokens if you want to use gated models or datasets.\u001b[39m\n", + "tokenizer_config.json: 100% 776/776 [00:00<00:00, 4.70MB/s]\n", + "tokenizer.model: 100% 500k/500k [00:00<00:00, 1.77MB/s]\n", + "special_tokens_map.json: 100% 414/414 [00:00<00:00, 1.73MB/s]\n", + "tokenizer.json: 100% 1.84M/1.84M [00:00<00:00, 4.52MB/s]\n", + "[2024-01-27 22:06:05,850] [DEBUG] [axolotl.load_tokenizer:216] [PID:2797] [RANK:0] EOS: 2 / \u001b[39m\n", + "[2024-01-27 22:06:05,850] [DEBUG] [axolotl.load_tokenizer:217] [PID:2797] [RANK:0] BOS: 1 / \u001b[39m\n", + "[2024-01-27 22:06:05,850] [DEBUG] [axolotl.load_tokenizer:218] [PID:2797] [RANK:0] PAD: 2 / \u001b[39m\n", + "[2024-01-27 22:06:05,850] [DEBUG] [axolotl.load_tokenizer:219] [PID:2797] [RANK:0] UNK: 0 / \u001b[39m\n", + "[2024-01-27 22:06:05,850] [INFO] [axolotl.load_tokenizer:230] [PID:2797] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.\u001b[39m\n", + "[2024-01-27 22:06:05,850] [INFO] [axolotl.load_tokenized_prepared_datasets:182] [PID:2797] [RANK:0] Unable to find prepared dataset in last_run_prepared/c0112363192ff19da1e486577d4bf28b\u001b[39m\n", + "[2024-01-27 22:06:05,850] [INFO] [axolotl.load_tokenized_prepared_datasets:183] [PID:2797] [RANK:0] Loading raw datasets...\u001b[39m\n", + "\u001b[33m[2024-01-27 22:06:05,850] [WARNING] [axolotl.load_tokenized_prepared_datasets:185] [PID:2797] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.\u001b[39m\n", + "[2024-01-27 22:06:05,850] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:2797] [RANK:0] No seed provided, using default seed of 42\u001b[39m\n", + "Downloading readme: 100% 28.0/28.0 [00:00<00:00, 219kB/s]\n", + "Downloading data: 100% 1.76M/1.76M [00:00<00:00, 2.45MB/s]\n", + "Generating train split: 2000 examples [00:00, 37778.35 examples/s]\n", + "Tokenizing Prompts (num_proc=2): 100% 2000/2000 [00:05<00:00, 352.09 examples/s]\n", + "[2024-01-27 22:06:17,586] [INFO] [axolotl.load_tokenized_prepared_datasets:395] [PID:2797] [RANK:0] merging datasets\u001b[39m\n", + "Dropping Long Sequences (num_proc=2): 100% 2000/2000 [00:00<00:00, 2927.43 examples/s]\n", + "Add position_id column (Sample Packing) (num_proc=2): 100% 2000/2000 [00:00<00:00, 3089.00 examples/s]\n", + "[2024-01-27 22:06:19,041] [INFO] [axolotl.load_tokenized_prepared_datasets:405] [PID:2797] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/c0112363192ff19da1e486577d4bf28b\u001b[39m\n", + "Saving the dataset (1/1 shards): 100% 2000/2000 [00:00<00:00, 142755.66 examples/s]\n", + "[2024-01-27 22:06:19,067] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] total_num_tokens: 22777\u001b[39m\n", + "[2024-01-27 22:06:19,069] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] `total_supervised_tokens: 16719`\u001b[39m\n", + "[2024-01-27 22:06:24,965] [INFO] [axolotl.utils.samplers.multipack._len_est:178] [PID:2797] [RANK:0] packing_efficiency_estimate: 1.0 total_num_tokens per device: 22777\u001b[39m\n", + "[2024-01-27 22:06:24,965] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] data_loader_len: 19\u001b[39m\n", + "[2024-01-27 22:06:24,966] [INFO] [axolotl.log:61] [PID:2797] [RANK:0] sample_packing_eff_est across ranks: [0.8659139294403893]\u001b[39m\n", + "[2024-01-27 22:06:24,966] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] sample_packing_eff_est: None\u001b[39m\n", + "[2024-01-27 22:06:24,966] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] total_num_steps: 76\u001b[39m\n", + "[2024-01-27 22:06:24,970] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] total_num_tokens: 414041\u001b[39m\n", + "[2024-01-27 22:06:25,001] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] `total_supervised_tokens: 294246`\u001b[39m\n", + "[2024-01-27 22:06:25,009] [INFO] [axolotl.utils.samplers.multipack._len_est:178] [PID:2797] [RANK:0] packing_efficiency_estimate: 1.0 total_num_tokens per device: 414041\u001b[39m\n", + "[2024-01-27 22:06:25,009] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] data_loader_len: 372\u001b[39m\n", + "[2024-01-27 22:06:25,009] [INFO] [axolotl.log:61] [PID:2797] [RANK:0] sample_packing_eff_est across ranks: [0.8624991667499917]\u001b[39m\n", + "[2024-01-27 22:06:25,009] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] sample_packing_eff_est: 0.87\u001b[39m\n", + "[2024-01-27 22:06:25,009] [DEBUG] [axolotl.log:61] [PID:2797] [RANK:0] total_num_steps: 1488\u001b[39m\n", + "[2024-01-27 22:06:25,009] [INFO] [axolotl.prepare_dataset:115] [PID:2797] [RANK:0] Maximum number of steps set at 20\u001b[39m\n", + "[2024-01-27 22:06:25,010] [DEBUG] [axolotl.train.log:61] [PID:2797] [RANK:0] loading tokenizer... TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\u001b[39m\n", + "[2024-01-27 22:06:25,574] [DEBUG] [axolotl.load_tokenizer:216] [PID:2797] [RANK:0] EOS: 2 / \u001b[39m\n", + "[2024-01-27 22:06:25,575] [DEBUG] [axolotl.load_tokenizer:217] [PID:2797] [RANK:0] BOS: 1 / \u001b[39m\n", + "[2024-01-27 22:06:25,575] [DEBUG] [axolotl.load_tokenizer:218] [PID:2797] [RANK:0] PAD: 2 / \u001b[39m\n", + "[2024-01-27 22:06:25,575] [DEBUG] [axolotl.load_tokenizer:219] [PID:2797] [RANK:0] UNK: 0 / \u001b[39m\n", + "[2024-01-27 22:06:25,575] [INFO] [axolotl.load_tokenizer:230] [PID:2797] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.\u001b[39m\n", + "[2024-01-27 22:06:25,575] [DEBUG] [axolotl.train.log:61] [PID:2797] [RANK:0] loading model and peft_config...\u001b[39m\n", + "[2024-01-27 22:06:25,678] [INFO] [axolotl.load_model:372] [PID:2797] [RANK:0] patching _expand_mask\u001b[39m\n", + "model.safetensors: 100% 4.40G/4.40G [01:42<00:00, 42.8MB/s]\n", + "generation_config.json: 100% 129/129 [00:00<00:00, 758kB/s]\n", + "[2024-01-27 22:08:13,799] [INFO] [axolotl.load_model:641] [PID:2797] [RANK:0] GPU memory usage after model load: 0.753GB (+0.022GB cache, +0.368GB misc)\u001b[39m\n", + "[2024-01-27 22:08:13,825] [INFO] [axolotl.load_model:673] [PID:2797] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n", + "[2024-01-27 22:08:13,829] [INFO] [axolotl.load_model:685] [PID:2797] [RANK:0] converting modules to torch.float16 for flash attention\u001b[39m\n", + "[2024-01-27 22:08:13,832] [INFO] [axolotl.load_lora:797] [PID:2797] [RANK:0] found linear modules: ['v_proj', 'k_proj', 'gate_proj', 'up_proj', 'o_proj', 'q_proj', 'down_proj']\u001b[39m\n", + "trainable params: 25,231,360 || all params: 1,125,279,744 || trainable%: 2.2422299996542017\n", + "[2024-01-27 22:08:14,263] [INFO] [axolotl.load_model:722] [PID:2797] [RANK:0] GPU memory usage after adapters: 0.847GB (+0.514GB cache, +0.368GB misc)\u001b[39m\n", + "[2024-01-27 22:08:14,273] [INFO] [axolotl.train.log:61] [PID:2797] [RANK:0] Pre-saving adapter config to ./qlora-out\u001b[39m\n", + "[2024-01-27 22:08:14,280] [INFO] [axolotl.train.log:61] [PID:2797] [RANK:0] Starting trainer...\u001b[39m\n", + "[2024-01-27 22:08:14,686] [INFO] [axolotl.utils.samplers.multipack._len_est:178] [PID:2797] [RANK:0] packing_efficiency_estimate: 0.87 total_num_tokens per device: 414041\u001b[39m\n", + "[2024-01-27 22:08:14,688] [INFO] [axolotl.utils.samplers.multipack._len_est:178] [PID:2797] [RANK:0] packing_efficiency_estimate: 0.87 total_num_tokens per device: 414041\u001b[39m\n", + "[2024-01-27 22:08:14,850] [INFO] [axolotl.callbacks.on_train_begin:601] [PID:2797] [RANK:0] The Axolotl config has been saved to the MLflow artifacts.\u001b[39m\n", + " 0% 0/20 [00:00', 'eos_token': '', 'unk_token': '', 'pad_token': ''}, clean_up_tokenization_spaces=False), added_tokens_decoder={ \t0: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), \t1: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), \t2: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), })\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!accelerate launch -m axolotl.cli.train config.yaml" + ] + }, + { + "cell_type": "code", + "source": [ + "!python3 -m axolotl.cli.merge_lora config.yaml --lora_model_dir=\"./qlora-out\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5GENnc9Z8v-P", + "outputId": "762e6290-9877-485c-c084-5370c3b3d7f3" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mkdir: cannot create directory ‘final_model’: File exists\n", + "2024-01-27 22:10:36.876247: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-01-27 22:10:36.876317: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-01-27 22:10:36.878122: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-01-27 22:10:38.390922: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "[2024-01-27 22:10:39,447] [INFO] [datasets.:58] [PID:4079] PyTorch version 2.1.2 available.\n", + "[2024-01-27 22:10:39,448] [INFO] [datasets.:95] [PID:4079] TensorFlow version 2.15.0 available.\n", + "[2024-01-27 22:10:39,449] [INFO] [datasets.:108] [PID:4079] JAX version 0.4.23 available.\n", + "[2024-01-27 22:10:40,350] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + " dP dP dP \n", + " 88 88 88 \n", + " .d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88 \n", + " 88' `88 `8bd8' 88' `88 88 88' `88 88 88 \n", + " 88. .88 .d88b. 88. .88 88 88. .88 88 88 \n", + " `88888P8 dP' `dP `88888P' dP `88888P' dP dP \n", + " \n", + " \n", + "\n", + "[2024-01-27 22:10:41,928] [INFO] [axolotl.normalize_config:170] [PID:4079] [RANK:0] GPU memory usage baseline: 0.000GB (+0.255GB misc)\u001b[39m\n", + "[2024-01-27 22:10:41,928] [INFO] [axolotl.common.cli.load_model_and_tokenizer:49] [PID:4079] [RANK:0] loading tokenizer... TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\u001b[39m\n", + "[2024-01-27 22:10:42,234] [DEBUG] [axolotl.load_tokenizer:216] [PID:4079] [RANK:0] EOS: 2 / \u001b[39m\n", + "[2024-01-27 22:10:42,234] [DEBUG] [axolotl.load_tokenizer:217] [PID:4079] [RANK:0] BOS: 1 / \u001b[39m\n", + "[2024-01-27 22:10:42,234] [DEBUG] [axolotl.load_tokenizer:218] [PID:4079] [RANK:0] PAD: 2 / \u001b[39m\n", + "[2024-01-27 22:10:42,234] [DEBUG] [axolotl.load_tokenizer:219] [PID:4079] [RANK:0] UNK: 0 / \u001b[39m\n", + "[2024-01-27 22:10:42,235] [INFO] [axolotl.load_tokenizer:230] [PID:4079] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.\u001b[39m\n", + "[2024-01-27 22:10:42,235] [INFO] [axolotl.common.cli.load_model_and_tokenizer:51] [PID:4079] [RANK:0] loading model and (optionally) peft_config...\u001b[39m\n", + "[2024-01-27 22:10:42,320] [INFO] [axolotl.load_model:372] [PID:4079] [RANK:0] patching _expand_mask\u001b[39m\n", + "[2024-01-27 22:10:45,770] [INFO] [axolotl.load_model:641] [PID:4079] [RANK:0] GPU memory usage after model load: 2.062GB (+0.087GB cache, +0.352GB misc)\u001b[39m\n", + "[2024-01-27 22:10:45,787] [INFO] [axolotl.load_model:685] [PID:4079] [RANK:0] converting modules to torch.float16 for flash attention\u001b[39m\n", + "[2024-01-27 22:10:45,791] [INFO] [axolotl.load_lora:797] [PID:4079] [RANK:0] found linear modules: ['q_proj', 'down_proj', 'v_proj', 'gate_proj', 'k_proj', 'up_proj', 'o_proj']\u001b[39m\n", + "[2024-01-27 22:10:45,791] [DEBUG] [axolotl.load_lora:816] [PID:4079] [RANK:0] Loading pretained PEFT - LoRA\u001b[39m\n", + "trainable params: 25,231,360 || all params: 1,125,279,744 || trainable%: 2.2422299996542017\n", + "[2024-01-27 22:10:46,372] [INFO] [axolotl.load_model:722] [PID:4079] [RANK:0] GPU memory usage after adapters: 2.590GB (+1.060GB cache, +0.368GB misc)\u001b[39m\n", + "[2024-01-27 22:10:46,372] [INFO] [axolotl.scripts.do_merge_lora:79] [PID:4079] [RANK:0] running merge of LoRA with base model\u001b[39m\n", + "Unloading and merging model: 100% 468/468 [00:00<00:00, 4211.50it/s]\n", + "[2024-01-27 22:10:46,490] [INFO] [axolotl.scripts.do_merge_lora:88] [PID:4079] [RANK:0] saving merged model to: qlora-out/merged\u001b[39m\n", + "\u001b[0m" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from huggingface_hub import HfApi\n", + "from google.colab import userdata\n", + "\n", + "new_model = \"mlabonne/TinyAlpaca\"\n", + "\n", + "# HF_TOKEN defined in the secrets tab in Google Colab\n", + "api = HfApi()\n", + "\n", + "# Upload merge folder\n", + "api.create_repo(\n", + " repo_id=new_model,\n", + " repo_type=\"model\",\n", + " exist_ok=True,\n", + ")\n", + "api.upload_folder(\n", + " repo_id=new_model,\n", + " folder_path=\"qlora-out/merged\",\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 149, + "referenced_widgets": [ + "76eab4e675b34a5486f28d16183658b6", + "af7f38e810a64e5f98dd3f33672b8d57", + "0e196628e7e64879bcc7184f67404127", + "2ee0d001c05a4b89ab7a3cbea71c5960", + "8a59f96ead334ba8b6848321fbde0811", + "3db6a3ae89ec483783f97d77aef5e5a7", + "f589d24de591428cb878405d1eb6034d", + "e4520b22ca934c1fbe399131c021d0e8", + "cb663cc62fc94108855d9b5509b573d8", + "9926acc37862427d9fdd1ef37474d0c9", + "215f84842ac5451a8f868e0dc3090a71", + "0878f651a6b64ef495ebd5a2a9084b63", + "eec99342bca04c1697ec43bc099c22c9", + "cb7cc89f3edf4c6c8e38043ef0d697ba", + "7a9497ed06004a82b2893e0f5ab03736", + "f50ea1d3c5874926a6db2af3d9d795ab", + "7851c929f5f64e31856d3793b963130e", + "49f8123c3c5e43ababed7e09e97cb7d2", + "c4dfec3f687241d6a69cf30252ada077", + "a1c9ecf2bc124658a08f585b1bf309e6", + "101d0b228b1943ff9c9d69fd6f4847ee", + "b03c7fa74c774a39946636803e086218", + "90726b0049c344c19fb5c9ef1dcf0628", + "bebef38ee9e24586a7f894ffaf1ae43e", + "9d90c5ea423e4586bc3bb303bcaa4b03", + "d417d5bf00294e3f9cb7e78b5d83ee03", + "68eac408135d4d048de89fa674b701da", + "34c11d3b1a3441bf8533f7a6f057d0cf", + "6270a81b7f5749d7a656c1edaaa63118", + "e1de7a96086c4351bba99bbd4d905802", + "6d28893d0a054d6aa2726ee94ee0d880", + "91c9822b8ef14074bd360bb5d2998267", + "8673edee8ab5430588daba8d422999eb" + ] + }, + "id": "NsvZw3B7-gfm", + "outputId": "68c10dec-ee7a-4c0d-dd8d-c6631b53f4b3" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "pytorch_model.bin: 0%| | 0.00/2.20G [00:00