Initial commit
commit
bd884491e4
@ -0,0 +1,2 @@
|
||||
# Auto detect text files and perform LF normalization
|
||||
* text=auto
|
@ -0,0 +1,8 @@
|
||||
_non
|
||||
config/loras/*
|
||||
config/models/*
|
||||
config/presets/*
|
||||
config/prompts/*
|
||||
config/softprompts/*
|
||||
config/training/*
|
||||
text-generation-webui/
|
@ -0,0 +1,91 @@
|
||||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS env_base
|
||||
# Pre-reqs
|
||||
RUN apt-get update && apt-get install --no-install-recommends -y \
|
||||
git vim build-essential python3-dev python3-venv python3-pip
|
||||
# Instantiate venv and pre-activate
|
||||
RUN pip3 install virtualenv
|
||||
RUN virtualenv /venv
|
||||
# Credit, Itamar Turner-Trauring: https://pythonspeed.com/articles/activate-virtualenv-dockerfile/
|
||||
ENV VIRTUAL_ENV=/venv
|
||||
RUN python3 -m venv $VIRTUAL_ENV
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
RUN pip3 install --upgrade pip setuptools && \
|
||||
pip3 install torch torchvision torchaudio
|
||||
|
||||
|
||||
FROM env_base AS app_base
|
||||
### DEVELOPERS/ADVANCED USERS ###
|
||||
# Clone oobabooga/text-generation-webui
|
||||
RUN git clone https://github.com/oobabooga/text-generation-webui /src
|
||||
# To use local source: comment out the git clone command then set the build arg `LCL_SRC_DIR`
|
||||
#ARG LCL_SRC_DIR="text-generation-webui"
|
||||
#COPY ${LCL_SRC_DIR} /src
|
||||
#################################
|
||||
# Copy source to app
|
||||
RUN cp -ar /src /app
|
||||
# Install oobabooga/text-generation-webui
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r /app/requirements.txt
|
||||
# Install extensions
|
||||
COPY ./scripts/build_extensions.sh /scripts/build_extensions.sh
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
chmod +x /scripts/build_extensions.sh && . /scripts/build_extensions.sh
|
||||
# Clone default GPTQ
|
||||
RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repositories/GPTQ-for-LLaMa
|
||||
# Build and install default GPTQ ('quant_cuda')
|
||||
ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
|
||||
RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
|
||||
|
||||
|
||||
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 AS base
|
||||
# Runtime pre-reqs
|
||||
RUN apt-get update && apt-get install --no-install-recommends -y \
|
||||
python3-venv git
|
||||
# Copy app and src
|
||||
COPY --from=app_base /app /app
|
||||
COPY --from=app_base /src /src
|
||||
# Copy and activate venv
|
||||
COPY --from=app_base /venv /venv
|
||||
ENV VIRTUAL_ENV=/venv
|
||||
RUN python3 -m venv $VIRTUAL_ENV
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
# Finalise app setup
|
||||
WORKDIR /app
|
||||
EXPOSE 7860
|
||||
EXPOSE 5000
|
||||
EXPOSE 5005
|
||||
# Required for Python print statements to appear in logs
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
# Run
|
||||
COPY ./scripts/docker-entrypoint.sh /scripts/docker-entrypoint.sh
|
||||
RUN chmod +x /scripts/docker-entrypoint.sh
|
||||
ENTRYPOINT ["/scripts/docker-entrypoint.sh"]
|
||||
|
||||
|
||||
|
||||
# VARIANT BUILDS
|
||||
FROM base AS cuda
|
||||
RUN echo "CUDA" >> /variant.txt
|
||||
RUN apt-get install --no-install-recommends -y git python3-dev python3-pip
|
||||
RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
|
||||
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b cuda /app/repositories/GPTQ-for-LLaMa
|
||||
RUN pip3 uninstall -y quant-cuda && \
|
||||
pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
|
||||
ENV EXTRA_LAUNCH_ARGS=""
|
||||
CMD ["python3", "/app/server.py"]
|
||||
|
||||
|
||||
FROM base AS triton
|
||||
RUN echo "TRITON" >> /variant.txt
|
||||
RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
|
||||
RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
|
||||
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b triton /app/repositories/GPTQ-for-LLaMa
|
||||
RUN pip3 uninstall -y quant-cuda && \
|
||||
pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
|
||||
ENV EXTRA_LAUNCH_ARGS=""
|
||||
CMD ["python3", "/app/server.py"]
|
||||
|
||||
|
||||
FROM base AS default
|
||||
RUN echo "DEFAULT" >> /variant.txt
|
||||
ENV EXTRA_LAUNCH_ARGS=""
|
||||
CMD ["python3", "/app/server.py"]
|
@ -0,0 +1,95 @@
|
||||
# Introduction
|
||||
This project dockerises the deployment of [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) and its variants. It provides a default configuration (corresponding to a vanilla deployment of the application) as well as pre-configured support for other set-ups (e.g., the more recent `triton` and `cuda` branches of GPTQ).
|
||||
|
||||
*This goal of this project is to be to [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui), what [AbdBarho/stable-diffusion-webui-docker](https://github.com/AbdBarho/stable-diffusion-webui-docker) is to [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).*
|
||||
|
||||
# Usage
|
||||
*This project currently supports Linux as the deployment platform.*
|
||||
|
||||
## Pre-Requisites
|
||||
- docker
|
||||
- docker compose
|
||||
- CUDA docker runtime
|
||||
|
||||
## Docker Compose
|
||||
This is the recommended deployment method.
|
||||
|
||||
### Select variant
|
||||
Choose the desired variant by setting the build `target` in `docker-compose.yml` to one of the following options:
|
||||
|
||||
| Variant | Description |
|
||||
|---|---|
|
||||
| `default` | Minimal implementation of the default deployment from source. |
|
||||
| `triton` | Updated GPTQ using the latest `triton` branch from `qwopqwop200/GPTQ-for-LLaMa`. Suitable for Linux only. |
|
||||
| `cuda` | Updated GPTQ using the latest `cuda` branch from `qwopqwop200/GPTQ-for-LLaMa`. |
|
||||
|
||||
*See: [oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md) for more information on variants.*
|
||||
|
||||
### Build
|
||||
Build the image:
|
||||
|
||||
`docker compose build`
|
||||
|
||||
*If you choose a different variant later, you must **rebuild** the image.*
|
||||
|
||||
### Deploy
|
||||
Deploy the service:
|
||||
|
||||
`docker compose up`
|
||||
|
||||
### Remove
|
||||
Remove the service:
|
||||
|
||||
`docker compose down -v`
|
||||
|
||||
## Configuration
|
||||
These configuration instructions describe the relevant details for this docker wrapper. Refer to [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) documentation for usage of the application itself.
|
||||
|
||||
### Ports
|
||||
Three commonly used ports are exposed:
|
||||
|
||||
| Port | Description |
|
||||
| ---- | ----------- |
|
||||
| `7860` | Web UI port |
|
||||
| `5000` | API port |
|
||||
| `5005` | Streaming port |
|
||||
|
||||
*Extensions may use additional ports - check the application documentation for more details.*
|
||||
|
||||
### Volumes
|
||||
The provided example docker compose maps several volumes from the local `config` directory into the container: `loras, models, presets, prompts, softprompts, training`. If these folders are empty, they will be initialised when the container is run.
|
||||
|
||||
*If you are getting an error about missing files, try clearing these folders and letting the service re-populate them.*
|
||||
|
||||
### Extra launch arguments
|
||||
Extra launch arguments can be defined in the environment variable `EXTRA_LAUNCH_ARGS` (e.g., `"--model MODEL_NAME"`, to load a model at launch). The provided default extra arguments are `--verbose` and `--listen` (which makes the webui available on your local network) and these are set in the `docker-compose.yml`.
|
||||
|
||||
*Launch arguments should be defined as a space-separated list, just like writing them on the command line. These arguments are passed to the `server.py` module.*
|
||||
|
||||
### Updates
|
||||
These projects are moving quickly! To update to the latest version, rebuild the image without cache:
|
||||
|
||||
`docker compose build --no-cache`
|
||||
|
||||
*When the container is launched, it will print out how many commits behind origin the current build is, so you can decide if you want to update it.*
|
||||
|
||||
### Developers / Advanced Users
|
||||
The Dockerfile can be easily modified to compile and run the application from a local source folder. This is useful if you want to do some development or run a custom version. See the Dockerfile itself for instructions on how to do this.
|
||||
|
||||
*Support is not provided for this deployment pathway. It is assumed that you are competent and willing to do your own debugging! Pro-tip: start by placing a `text-generation-webui` repo into the project folder.*
|
||||
|
||||
## Standalone Container
|
||||
NOT recommended, instructions are included for completeness.
|
||||
|
||||
### Build
|
||||
Build the image for the default target:
|
||||
|
||||
`docker build --target default -t text-generation-webui:local .`
|
||||
|
||||
### Run
|
||||
Run a container (and destroy it upon completion):
|
||||
|
||||
`docker run --it --rm -p 7860:7860 text-generation-webui:local`
|
||||
|
||||
# Contributions
|
||||
Contributions are welcomed - please feel free to submit a PR. More variants (e.g., AMD/ROC-M support) and Windows support can help lower the barrier to entry, make this technology accessible to as many people as possible, and push towards democratising the severe impacts that AI is having on our society.
|
@ -0,0 +1,35 @@
|
||||
version: "3"
|
||||
services:
|
||||
text-generation-webui-docker:
|
||||
build:
|
||||
context: .
|
||||
target: default # Specify the variant to build
|
||||
# args:
|
||||
# - LCL_SRC_DIR=text-generation-webui # Developers - see Dockerfile app_base
|
||||
container_name: text-generation-webui
|
||||
environment:
|
||||
- EXTRA_LAUNCH_ARGS="--listen --verbose" # Custom launch args (e.g., --model MODEL_NAME)
|
||||
ports:
|
||||
- 7860:7860 # Default web port
|
||||
# - 5000:5000 # Default API port
|
||||
# - 5005:5005 # Default streaming port
|
||||
# - 5001:5001 # Default OpenAI API extension port
|
||||
volumes:
|
||||
- ./config/loras:/app/loras
|
||||
- ./config/models:/app/models
|
||||
- ./config/presets:/app/presets
|
||||
- ./config/prompts:/app/prompts
|
||||
- ./config/softprompts:/app/softprompts
|
||||
- ./config/training:/app/training
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-file: "3" # number of files or file count
|
||||
max-size: '10m'
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['0']
|
||||
capabilities: [gpu]
|
@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Specify the directory containing the top-level folders
|
||||
directory="/app/extensions"
|
||||
|
||||
# Iterate over the top-level folders
|
||||
for folder in "$directory"/*; do
|
||||
if [ -d "$folder" ]; then
|
||||
# Change directory to the current folder
|
||||
cd "$folder"
|
||||
|
||||
# Check if requirements.txt file exists
|
||||
if [ -f "requirements.txt" ]; then
|
||||
echo "Installing requirements in $folder..."
|
||||
pip3 install -r requirements.txt
|
||||
echo "Requirements installed in $folder"
|
||||
else
|
||||
echo "Skipping $folder: requirements.txt not found"
|
||||
fi
|
||||
|
||||
# Change back to the original directory
|
||||
cd "$directory"
|
||||
fi
|
||||
done
|
||||
|
@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to handle keyboard interrupt
|
||||
function ctrl_c {
|
||||
echo -e "\nKilling container!"
|
||||
# Add your cleanup actions here
|
||||
exit 0
|
||||
}
|
||||
# Register the keyboard interrupt handler
|
||||
trap ctrl_c SIGTERM SIGINT SIGQUIT SIGHUP
|
||||
|
||||
# Generate default configs if empty
|
||||
CONFIG_DIRECTORIES=("loras" "models" "presets" "prompts" "softprompts" "training")
|
||||
for config_dir in "${CONFIG_DIRECTORIES[@]}"; do
|
||||
if [ -z "$(ls -A /app/"$config_dir")" ]; then
|
||||
cp -ar /src/"$config_dir" /app/
|
||||
chown -R 1000:1000 /app/"$config_dir" # Not ideal... but convenient.
|
||||
fi
|
||||
done
|
||||
|
||||
# Print variant
|
||||
VARIANT=$(cat /variant.txt)
|
||||
echo "=== Running text-generation-webui variant: '$VARIANT' ==="
|
||||
|
||||
# Print version freshness
|
||||
cur_dir=$(pwd)
|
||||
src_dir="/src"
|
||||
cd $src_dir
|
||||
git fetch origin >/dev/null 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
# An error occurred
|
||||
COMMITS_BEHIND="UNKNOWN"
|
||||
else
|
||||
# The command executed successfully
|
||||
COMMITS_BEHIND=$(git rev-list HEAD..origin --count)
|
||||
fi
|
||||
echo "=== (This version is $COMMITS_BEHIND commits behind origin) ==="
|
||||
cd $cur_dir
|
||||
|
||||
# Assemble CMD and extra launch args
|
||||
eval "extra_launch_args=($EXTRA_LAUNCH_ARGS)"
|
||||
LAUNCHER=($@ $extra_launch_args)
|
||||
|
||||
# Launch the server with ${CMD[@]} + ${EXTRA_LAUNCH_ARGS[@]}
|
||||
"${LAUNCHER[@]}"
|
Loading…
Reference in New Issue