diff --git a/Dockerfile.hpu b/Dockerfile.hpu new file mode 100644 index 000000000..89456c581 --- /dev/null +++ b/Dockerfile.hpu @@ -0,0 +1,72 @@ +# syntax=docker/dockerfile:1 +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest AS base +LABEL homepage="https://khoj.dev" +LABEL repository="https://github.com/khoj-ai/khoj" +LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj" +LABEL org.opencontainers.image.description="Your second brain, containerized for personal, local deployment." + +# Install System Dependencies +RUN apt update -y && apt -y install \ + python3-pip \ + tzdata \ + swig \ + curl \ + # Required by RapidOCR + libgl1 \ + libglx-mesa0 \ + libglib2.0-0 \ + # Required by llama-cpp-python pre-built wheels. See #1628 + musl-dev && \ + ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \ + # Clean up + apt clean && rm -rf /var/lib/apt/lists/* + +# Build Server +FROM base AS server-deps +WORKDIR /app +COPY pyproject.toml . +COPY README.md . +ARG VERSION=0.0.0 + +# Set environment variables for Habana +ENV HABANA_VISIBLE_DEVICES=all +ENV OMPI_MCA_btl_vader_single_copy_mechanism=none +ENV PT_HPU_LAZY_ACC_PAR_MODE=0 +ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=1 + + +# use the pre-built llama-cpp-python, torch cpu wheel +ENV PIP_EXTRA_INDEX_URL="https://abetlen.github.io/llama-cpp-python/whl/cpu" +# avoid downloading unused cuda specific python packages +ENV CUDA_VISIBLE_DEVICES="" +RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \ + pip install --no-cache-dir . + +# Build Web App +FROM node:20-alpine AS web-app +# Set build optimization env vars +ENV NODE_ENV=production +ENV NEXT_TELEMETRY_DISABLED=1 +WORKDIR /app/src/interface/web +# Install dependencies first (cache layer) +COPY src/interface/web/package.json src/interface/web/yarn.lock ./ +RUN yarn install --frozen-lockfile +# Copy source and build +COPY src/interface/web/. ./ +RUN yarn build + +# Merge the Server and Web App into a Single Image +FROM base +ENV PYTHONPATH=/app/src +WORKDIR /app +COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built +COPY . . +RUN cd src && python3 khoj/manage.py collectstatic --noinput + +# Run the Application +# There are more arguments required for the application to run, +# but those should be passed in through the docker-compose.yml file. +ARG PORT +EXPOSE ${PORT} +ENTRYPOINT ["python3", "src/khoj/main.py"] diff --git a/README.md b/README.md index 5a9681289..d2d93cb32 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,12 @@ You can see the full feature list [here](https://docs.khoj.dev/category/features To get started with self-hosting Khoj, [read the docs](https://docs.khoj.dev/get-started/setup). +## 🚀 HPU (Habana Processing Unit) Support + +We now support running Khoj on **Habana Gaudi accelerators (HPUs)**! This allows you to leverage the power of Habana's AI processors for faster and more efficient model inference. + +For more information, see [here](src/khoj/app/README.md#-hpu-support). + ## Enterprise Khoj is available as a cloud service, on-premises, or as a hybrid solution. To learn more about Khoj Enterprise, [visit our website](https://khoj.dev/teams). diff --git a/pyproject.toml b/pyproject.toml index 1ed9426e1..6ccd275b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ dependencies = [ "google-generativeai == 0.8.3", "pyjson5 == 1.6.7", "resend == 1.0.1", + "optimum-habana == 1.14.1", "email-validator == 2.2.0", ] dynamic = ["version"] diff --git a/src/khoj/app/README.md b/src/khoj/app/README.md index 14fc8501e..775eb7a2c 100644 --- a/src/khoj/app/README.md +++ b/src/khoj/app/README.md @@ -92,3 +92,51 @@ While we're using Django for the ORM, we're still using the FastAPI server for t ```bash python3 src/khoj/main.py --anonymous-mode ``` + + +## 🚀 HPU Support +### 🛠️ Setup for HPU + +To run Khoj on a Habana Gaudi device, follow these steps: + +1. **Build the HPU Docker Image**: + Use the provided `Dockerfile.hpu` to build a Docker image optimized for HPU: + ```bash + docker build -t khoj-hpu -f Dockerfile.hpu . + ``` + +2. **Run the Docker Container**: + Start the container with the appropriate environment variables for HPU: + ```bash + docker run --runtime=habana -e HABANA_VISIBLE_DEVICES=all -p : khoj-hpu + ``` + Replace `` with the port number you want to expose. + +3. **Verify HPU Support**: + Ensure that the application detects the HPU device by checking the logs. The application will automatically use the HPU if available. + +### 📦 New Dependencies + +To support HPU and other advanced features, we've added the following dependencies: + +- **`optimum-habana`**: Optimizes models for Habana Gaudi accelerators. +- **`torch-geometric`**: Enables deep learning on graph-based data structures. +- **`numba`**: Accelerates Python code by compiling it to machine code at runtime. + +These dependencies are automatically installed when you build the Docker image or install the project locally. + +### 🧠 Device Selection + +The application now supports multiple device types, including **CUDA**, **HPU**, **MPS** (Apple Silicon), and **CPU**. You can specify your preferred device by passing the `preferred_device` argument to the `get_device()` function in `helpers.py`. For example: + +```python +device = get_device(preferred_device="hpu") # Use HPU if available +``` + +If no preferred device is specified, the application will automatically select the best available device. + +### 📝 Notes + +- Ensure that your system has the necessary Habana drivers and software stack installed to use HPUs. +- For more information on Habana Gaudi accelerators, visit the [Habana Labs documentation](https://docs.habana.ai/). + diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index b48436c64..93feed34f 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -2,6 +2,7 @@ import copy import datetime +import importlib import io import ipaddress import logging @@ -301,6 +302,8 @@ def log_telemetry( def get_device_memory() -> int: """Get device memory in GB""" device = get_device() + if device.type == "hpu": + return torch.hpu.get_device_properties(device).total_memory if device.type == "cuda": return torch.cuda.get_device_properties(device).total_memory elif device.type == "mps": @@ -309,15 +312,32 @@ def get_device_memory() -> int: return psutil.virtual_memory().total -def get_device() -> torch.device: - """Get device to run model on""" +def get_device(preferred_device=None) -> torch.device: + """ + Determine the appropriate device to use (cuda, hpu, or cpu). + Args: + preferred_device (str): User-preferred device ('cuda', 'hpu', or 'cpu'). + Returns: + torch.device: 'cuda', 'hpu', 'mps' or 'cpu'. + """ + # Check for HPU support + if importlib.util.find_spec("habana_frameworks") is not None: + from habana_frameworks.torch.utils.library_loader import load_habana_module + + load_habana_module() + if torch.hpu.is_available(): + if preferred_device is None or "hpu" in preferred_device: + return torch.device("hpu") + # Use CUDA GPU if available if torch.cuda.is_available(): - # Use CUDA GPU - return torch.device("cuda:0") + if preferred_device is None or "cuda" in preferred_device: + return torch.device("cuda:0") + # Use Apple M1 Metal Acceleration if available elif torch.backends.mps.is_available(): - # Use Apple M1 Metal Acceleration - return torch.device("mps") + if preferred_device is None or "mps" in preferred_device: + return torch.device("mps") else: + # Default to CPU return torch.device("cpu")