khoj-ai · BartoszBLL · Jan 13, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 21, 2025
diff --git a/Dockerfile.hpu b/Dockerfile.hpu
@@ -0,0 +1,72 @@
+# syntax=docker/dockerfile:1
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest AS base
+LABEL homepage="https://khoj.dev"
+LABEL repository="https://github.com/khoj-ai/khoj"
+LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
+LABEL org.opencontainers.image.description="Your second brain, containerized for personal, local deployment."
+
+# Install System Dependencies
+RUN apt update -y && apt -y install \
+    python3-pip \
+    tzdata \
+    swig \
+    curl \
+    # Required by RapidOCR
+    libgl1 \
+    libglx-mesa0 \
+    libglib2.0-0 \
+    # Required by llama-cpp-python pre-built wheels. See #1628
+    musl-dev && \
+    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
+    # Clean up
+    apt clean && rm -rf /var/lib/apt/lists/*
+
+# Build Server
+FROM base AS server-deps
+WORKDIR /app
+COPY pyproject.toml .
+COPY README.md .
+ARG VERSION=0.0.0
+
+# Set environment variables for Habana
+ENV HABANA_VISIBLE_DEVICES=all
+ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
+ENV PT_HPU_LAZY_ACC_PAR_MODE=0
+ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=1
+
+
+# use the pre-built llama-cpp-python, torch cpu wheel
+ENV PIP_EXTRA_INDEX_URL="https://abetlen.github.io/llama-cpp-python/whl/cpu"
+# avoid downloading unused cuda specific python packages
+ENV CUDA_VISIBLE_DEVICES=""
+RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
+    pip install --no-cache-dir .
+
+# Build Web App
+FROM node:20-alpine AS web-app
+# Set build optimization env vars
+ENV NODE_ENV=production
+ENV NEXT_TELEMETRY_DISABLED=1
+WORKDIR /app/src/interface/web
+# Install dependencies first (cache layer)
+COPY src/interface/web/package.json src/interface/web/yarn.lock ./
+RUN yarn install --frozen-lockfile
+# Copy source and build
+COPY src/interface/web/. ./
+RUN yarn build
+
+# Merge the Server and Web App into a Single Image
+FROM base
+ENV PYTHONPATH=/app/src
+WORKDIR /app
+COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
+COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
+COPY . .
+RUN cd src && python3 khoj/manage.py collectstatic --noinput
+
+# Run the Application
+# There are more arguments required for the application to run,
+# but those should be passed in through the docker-compose.yml file.
+ARG PORT
+EXPOSE ${PORT}
+ENTRYPOINT ["python3", "src/khoj/main.py"]
diff --git a/README.md b/README.md
@@ -67,6 +67,12 @@ You can see the full feature list [here](https://docs.khoj.dev/category/features
 
 To get started with self-hosting Khoj, [read the docs](https://docs.khoj.dev/get-started/setup).
 
+## 🚀 HPU (Habana Processing Unit) Support
+
+We now support running Khoj on **Habana Gaudi accelerators (HPUs)**! This allows you to leverage the power of Habana's AI processors for faster and more efficient model inference.
+
+For more information, see [here](src/khoj/app/README.md#-hpu-support).
+
 ## Enterprise
 
 Khoj is available as a cloud service, on-premises, or as a hybrid solution. To learn more about Khoj Enterprise, [visit our website](https://khoj.dev/teams).

diff --git a/pyproject.toml b/pyproject.toml
@@ -91,6 +91,7 @@ dependencies = [
     "google-generativeai == 0.8.3",
     "pyjson5 == 1.6.7",
     "resend == 1.0.1",
+    "optimum-habana == 1.14.1",
     "email-validator == 2.2.0",
 ]
 dynamic = ["version"]

diff --git a/src/khoj/app/README.md b/src/khoj/app/README.md
@@ -92,3 +92,51 @@ While we're using Django for the ORM, we're still using the FastAPI server for t
 ```bash
 python3 src/khoj/main.py --anonymous-mode
 ```
+
+
+## 🚀 HPU Support
+### 🛠️ Setup for HPU
+
+To run Khoj on a Habana Gaudi device, follow these steps:
+
+1. **Build the HPU Docker Image**:
+   Use the provided `Dockerfile.hpu` to build a Docker image optimized for HPU:
+   ```bash
+   docker build -t khoj-hpu -f Dockerfile.hpu .
+   ```
+
+2. **Run the Docker Container**:
+   Start the container with the appropriate environment variables for HPU:
+   ```bash
+   docker run --runtime=habana -e HABANA_VISIBLE_DEVICES=all -p <PORT>:<PORT> khoj-hpu
+   ```
+   Replace `<PORT>` with the port number you want to expose.
+
+3. **Verify HPU Support**:
+   Ensure that the application detects the HPU device by checking the logs. The application will automatically use the HPU if available.
+
+### 📦 New Dependencies
+
+To support HPU and other advanced features, we've added the following dependencies:
+
+- **`optimum-habana`**: Optimizes models for Habana Gaudi accelerators.
+- **`torch-geometric`**: Enables deep learning on graph-based data structures.
+- **`numba`**: Accelerates Python code by compiling it to machine code at runtime.
+
+These dependencies are automatically installed when you build the Docker image or install the project locally.
+
+### 🧠 Device Selection
+
+The application now supports multiple device types, including **CUDA**, **HPU**, **MPS** (Apple Silicon), and **CPU**. You can specify your preferred device by passing the `preferred_device` argument to the `get_device()` function in `helpers.py`. For example:
+
+```python
+device = get_device(preferred_device="hpu")  # Use HPU if available
+```
+
+If no preferred device is specified, the application will automatically select the best available device.
+
+### 📝 Notes
+
+- Ensure that your system has the necessary Habana drivers and software stack installed to use HPUs.
+- For more information on Habana Gaudi accelerators, visit the [Habana Labs documentation](https://docs.habana.ai/).
+
diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py
@@ -2,6 +2,7 @@
 
 import copy
 import datetime
+import importlib
 import io
 import ipaddress
 import logging
@@ -301,6 +302,8 @@ def log_telemetry(
 def get_device_memory() -> int:
     """Get device memory in GB"""
     device = get_device()
+    if device.type == "hpu":
+        return torch.hpu.get_device_properties(device).total_memory
     if device.type == "cuda":
         return torch.cuda.get_device_properties(device).total_memory
     elif device.type == "mps":
@@ -309,15 +312,32 @@ def get_device_memory() -> int:
         return psutil.virtual_memory().total
 
 
-def get_device() -> torch.device:
-    """Get device to run model on"""
+def get_device(preferred_device=None) -> torch.device:
+    """
+    Determine the appropriate device to use (cuda, hpu, or cpu).
+    Args:
+        preferred_device (str): User-preferred device ('cuda', 'hpu', or 'cpu').
+    Returns:
+        torch.device: 'cuda', 'hpu', 'mps' or 'cpu'.
+    """
+    # Check for HPU support
+    if importlib.util.find_spec("habana_frameworks") is not None:
+        from habana_frameworks.torch.utils.library_loader import load_habana_module
+
+        load_habana_module()
+        if torch.hpu.is_available():
+            if preferred_device is None or "hpu" in preferred_device:
+                return torch.device("hpu")
+    # Use CUDA GPU if available
     if torch.cuda.is_available():
-        # Use CUDA GPU
-        return torch.device("cuda:0")
+        if preferred_device is None or "cuda" in preferred_device:
+            return torch.device("cuda:0")
+    # Use Apple M1 Metal Acceleration if available
     elif torch.backends.mps.is_available():
-        # Use Apple M1 Metal Acceleration
-        return torch.device("mps")
+        if preferred_device is None or "mps" in preferred_device:
+            return torch.device("mps")
     else:
+        # Default to CPU
         return torch.device("cpu")