khoj-ai · debanjum · Aug 1, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -18,8 +18,8 @@ ENV PATH="/opt/venv/bin:${PATH}"
 COPY pyproject.toml README.md ./
 
 # Setup python environment
-    # Use the pre-built llama-cpp-python, torch cpu wheel
-ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu" \
+    # Use the pre-built torch cpu wheel
+ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" \
     # Avoid downloading unused cuda specific python packages
     CUDA_VISIBLE_DEVICES="" \
     # Use static version to build app without git dependency

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -64,8 +64,6 @@ jobs:
           DEBIAN_FRONTEND: noninteractive
         run: |
           apt update && apt install -y git libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
-          # required by llama-cpp-python prebuilt wheels
-          apt install -y musl-dev && ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
 
       - name: ⬇️ Install Postgres
         env:

diff --git a/documentation/docs/advanced/admin.md b/documentation/docs/advanced/admin.md
@@ -20,7 +20,7 @@ Add all the agents you want to use for your different use-cases like Writer, Res
 ### Chat Model Options
 Add all the chat models you want to try, use and switch between for your different use-cases. For each chat model you add:
 - `Chat model`: The name of an [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://docs.anthropic.com/en/docs/about-claude/models#model-names), [Gemini](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models) or [Offline](https://huggingface.co/models?pipeline_tag=text-generation&library=gguf) chat model.
-- `Model type`: The chat model provider like `OpenAI`, `Offline`.
+- `Model type`: The chat model provider like `OpenAI`, `Google`.
 - `Vision enabled`: Set to `true` if your model supports vision. This is currently only supported for vision capable OpenAI models like `gpt-4o`
 - `Max prompt size`, `Subscribed max prompt size`: These are optional fields. They are used to truncate the context to the maximum context size that can be passed to the model. This can help with accuracy and cost-saving.<br />
 - `Tokenizer`: This is an optional field. It is used to accurately count tokens and truncate context passed to the chat model to stay within the models max prompt size.

diff --git a/documentation/docs/get-started/setup.mdx b/documentation/docs/get-started/setup.mdx
@@ -18,10 +18,6 @@ import TabItem from '@theme/TabItem';
 These are the general setup instructions for self-hosted Khoj.
 You can install the Khoj server using either [Docker](?server=docker) or [Pip](?server=pip).
 
-:::info[Offline Model + GPU]
-To use the offline chat model with your GPU, we recommend using the Docker setup with Ollama . You can also use the local Khoj setup via the Python package directly.
-:::
-
 :::info[First Run]
 Restart your Khoj server after the first run to ensure all settings are applied correctly.
 :::
@@ -225,10 +221,6 @@ To start Khoj automatically in the background use [Task scheduler](https://www.w
 You can now open the web app at http://localhost:42110 and start interacting!<br />
 Nothing else is necessary, but you can customize your setup further by following the steps below.
 
-:::info[First Message to Offline Chat Model]
-The offline chat model gets downloaded when you first send a message to it. The download can take a few minutes! Subsequent messages should be faster.
-:::
-
 ### Add Chat Models
 <h4>Login to the Khoj Admin Panel</h4>
 Go to http://localhost:42110/server/admin and login with the admin credentials you setup during installation.
@@ -301,13 +293,14 @@ Offline chat stays completely private and can work without internet using any op
 - A Nvidia, AMD GPU or a Mac M1+ machine would significantly speed up chat responses
 :::
 
-1. Get the name of your preferred chat model from [HuggingFace](https://huggingface.co/models?pipeline_tag=text-generation&library=gguf). *Most GGUF format chat models are supported*.
-2. Open the [create chat model page](http://localhost:42110/server/admin/database/chatmodel/add/) on the admin panel
-3. Set the `chat-model` field to the name of your preferred chat model
-   - Make sure the `model-type` is set to `Offline`
-4. Set the newly added chat model as your preferred model in your [User chat settings](http://localhost:42110/settings) and [Server chat settings](http://localhost:42110/server/admin/database/serverchatsettings/).
-5. Restart the Khoj server and [start chatting](http://localhost:42110) with your new offline model!
-  </TabItem>
+1. Install any Openai API compatible local ai model server like [llama-cpp-server](https://github.com/ggml-org/llama.cpp/tree/master/tools/server), Ollama, vLLM etc.
+2. Add an [ai model api](http://localhost:42110/server/admin/database/aimodelapi/add/) on the admin panel
+   - Set the `api url` field to the url of your local ai model provider like `http://localhost:11434/v1/` for Ollama
+3. Restart the Khoj server to load models available on your local ai model provider
+   - If that doesn't work, you'll need to manually add available [chat model](http://localhost:42110/server/admin/database/chatmodel/add) in the admin panel.
+4. Set the newly added chat model as your preferred model in your [User chat settings](http://localhost:42110/settings)
+5. [Start chatting](http://localhost:42110) with your local AI!
+</TabItem>
 </Tabs>
 
 :::tip[Multiple Chat Models]

diff --git a/pyproject.toml b/pyproject.toml
@@ -65,7 +65,6 @@ dependencies = [
     "django == 5.1.10",
     "django-unfold == 0.42.0",
     "authlib == 1.2.1",
-    "llama-cpp-python == 0.2.88",
     "itsdangerous == 2.1.2",
     "httpx == 0.28.1",
     "pgvector == 0.2.4",

diff --git a/src/khoj/configure.py b/src/khoj/configure.py
@@ -50,13 +50,11 @@
 )
 from khoj.database.models import ClientApplication, KhojUser, ProcessLock, Subscription
 from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
-from khoj.routers.api_content import configure_content, configure_search
+from khoj.routers.api_content import configure_content
 from khoj.routers.twilio import is_twilio_enabled
 from khoj.utils import constants, state
 from khoj.utils.config import SearchType
-from khoj.utils.fs_syncer import collect_files
-from khoj.utils.helpers import is_none_or_empty, telemetry_disabled
-from khoj.utils.rawconfig import FullConfig
+from khoj.utils.helpers import is_none_or_empty
 
 logger = logging.getLogger(__name__)
 
@@ -232,14 +230,6 @@ async def authenticate(self, request: HTTPConnection):
         return AuthCredentials(), UnauthenticatedUser()
 
 
-def initialize_server(config: Optional[FullConfig]):
-    try:
-        configure_server(config, init=True)
-    except Exception as e:
-        logger.error(f"🚨 Failed to configure server on app load: {e}", exc_info=True)
-        raise e
-
-
 def clean_connections(func):
     """
     A decorator that ensures that Django database connections that have become unusable, or are obsolete, are closed
@@ -260,19 +250,7 @@ def func_wrapper(*args, **kwargs):
     return func_wrapper
 
 
-def configure_server(
-    config: FullConfig,
-    regenerate: bool = False,
-    search_type: Optional[SearchType] = None,
-    init=False,
-    user: KhojUser = None,
-):
-    # Update Config
-    if config == None:
-        logger.info(f"Initializing with default config.")
-        config = FullConfig()
-    state.config = config
-
+def initialize_server():
     if ConversationAdapters.has_valid_ai_model_api():
         ai_model_api = ConversationAdapters.get_ai_model_api()
         state.openai_client = openai.OpenAI(api_key=ai_model_api.api_key, base_url=ai_model_api.api_base_url)
@@ -309,43 +287,33 @@ def configure_server(
             )
 
         state.SearchType = configure_search_types()
-        state.search_models = configure_search(state.search_models, state.config.search_type)
-        setup_default_agent(user)
+        setup_default_agent()
 
-        message = (
-            "📡 Telemetry disabled"
-            if telemetry_disabled(state.config.app, state.telemetry_disabled)
-            else "📡 Telemetry enabled"
-        )
+        message = "📡 Telemetry disabled" if state.telemetry_disabled else "📡 Telemetry enabled"
         logger.info(message)
 
-        if not init:
-            initialize_content(user, regenerate, search_type)
-
     except Exception as e:
         logger.error(f"Failed to load some search models: {e}", exc_info=True)
 
 
-def setup_default_agent(user: KhojUser):
-    AgentAdapters.create_default_agent(user)
+def setup_default_agent():
+    AgentAdapters.create_default_agent()
 
 
 def initialize_content(user: KhojUser, regenerate: bool, search_type: Optional[SearchType] = None):
     # Initialize Content from Config
-    if state.search_models:
-        try:
-            logger.info("📬 Updating content index...")
-            all_files = collect_files(user=user)
-            status = configure_content(
-                user,
-                all_files,
-                regenerate,
-                search_type,
-            )
-            if not status:
-                raise RuntimeError("Failed to update content index")
-        except Exception as e:
-            raise e
+    try:
+        logger.info("📬 Updating content index...")
+        status = configure_content(
+            user,
+            {},
+            regenerate,
+            search_type,
+        )
+        if not status:
+            raise RuntimeError("Failed to update content index")
+    except Exception as e:
+        raise e
 
 
 def configure_routes(app):
@@ -438,8 +406,7 @@ async def dispatch(self, request: Request, call_next):
 
 def update_content_index():
     for user in get_all_users():
-        all_files = collect_files(user=user)
-        success = configure_content(user, all_files)
+        success = configure_content(user, {})
     if not success:
         raise RuntimeError("Failed to update content index")
     logger.info("📪 Content index updated via Scheduler")
@@ -464,7 +431,7 @@ def configure_search_types():
 @schedule.repeat(schedule.every(2).minutes)
 @clean_connections
 def upload_telemetry():
-    if telemetry_disabled(state.config.app, state.telemetry_disabled) or not state.telemetry:
+    if state.telemetry_disabled or not state.telemetry:
         return
 
     try:

diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py
@@ -72,7 +72,6 @@
 from khoj.search_filter.file_filter import FileFilter
 from khoj.search_filter.word_filter import WordFilter
 from khoj.utils import state
-from khoj.utils.config import OfflineChatProcessorModel
 from khoj.utils.helpers import (
     clean_object_for_db,
     clean_text_for_db,
@@ -789,8 +788,8 @@ def get_default_agent():
         return Agent.objects.filter(name=AgentAdapters.DEFAULT_AGENT_NAME).first()
 
     @staticmethod
-    def create_default_agent(user: KhojUser):
-        default_chat_model = ConversationAdapters.get_default_chat_model(user)
+    def create_default_agent():
+        default_chat_model = ConversationAdapters.get_default_chat_model(user=None)
         if default_chat_model is None:
             logger.info("No default conversation config found, skipping default agent creation")
             return None
@@ -1553,14 +1552,6 @@ async def aget_valid_chat_model(user: KhojUser, conversation: Conversation, is_s
         if chat_model is None:
             chat_model = await ConversationAdapters.aget_default_chat_model()
 
-        if chat_model.model_type == ChatModel.ModelType.OFFLINE:
-            if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
-                chat_model_name = chat_model.name
-                max_tokens = chat_model.max_prompt_size
-                state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
-
-            return chat_model
-
         if (
             chat_model.model_type
             in [

diff --git a/...khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py b/...khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py
@@ -0,0 +1,36 @@
+# Generated by Django 5.1.10 on 2025-07-19 21:33
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("database", "0091_chatmodel_friendly_name_and_more"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="chatmodel",
+            name="model_type",
+            field=models.CharField(
+                choices=[("openai", "Openai"), ("anthropic", "Anthropic"), ("google", "Google")],
+                default="google",
+                max_length=200,
+            ),
+        ),
+        migrations.AlterField(
+            model_name="chatmodel",
+            name="name",
+            field=models.CharField(default="gemini-2.5-flash", max_length=200),
+        ),
+        migrations.AlterField(
+            model_name="speechtotextmodeloptions",
+            name="model_name",
+            field=models.CharField(default="whisper-1", max_length=200),
+        ),
+        migrations.AlterField(
+            model_name="speechtotextmodeloptions",
+            name="model_type",
+            field=models.CharField(choices=[("openai", "Openai")], default="openai", max_length=200),
+        ),
+    ]
diff --git a/src/khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py b/src/khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py
@@ -0,0 +1,36 @@
+# Generated by Django 5.1.10 on 2025-07-25 23:30
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("database", "0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="localorgconfig",
+            name="user",
+        ),
+        migrations.RemoveField(
+            model_name="localpdfconfig",
+            name="user",
+        ),
+        migrations.RemoveField(
+            model_name="localplaintextconfig",
+            name="user",
+        ),
+        migrations.DeleteModel(
+            name="LocalMarkdownConfig",
+        ),
+        migrations.DeleteModel(
+            name="LocalOrgConfig",
+        ),
+        migrations.DeleteModel(
+            name="LocalPdfConfig",
+        ),
+        migrations.DeleteModel(
+            name="LocalPlaintextConfig",
+        ),
+    ]
diff --git a/src/khoj/database/models/__init__.py b/src/khoj/database/models/__init__.py
@@ -220,16 +220,15 @@ class PriceTier(models.TextChoices):
 class ChatModel(DbBaseModel):
     class ModelType(models.TextChoices):
         OPENAI = "openai"
-        OFFLINE = "offline"
         ANTHROPIC = "anthropic"
         GOOGLE = "google"
 
     max_prompt_size = models.IntegerField(default=None, null=True, blank=True)
     subscribed_max_prompt_size = models.IntegerField(default=None, null=True, blank=True)
     tokenizer = models.CharField(max_length=200, default=None, null=True, blank=True)
-    name = models.CharField(max_length=200, default="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF")
+    name = models.CharField(max_length=200, default="gemini-2.5-flash")
     friendly_name = models.CharField(max_length=200, default=None, null=True, blank=True)
-    model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OFFLINE)
+    model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.GOOGLE)
     price_tier = models.CharField(max_length=20, choices=PriceTier.choices, default=PriceTier.FREE)
     vision_enabled = models.BooleanField(default=False)
     ai_model_api = models.ForeignKey(AiModelApi, on_delete=models.CASCADE, default=None, null=True, blank=True)
@@ -489,34 +488,6 @@ def save(self, *args, **kwargs):
         super().save(*args, **kwargs)
 
 
-class LocalOrgConfig(DbBaseModel):
-    input_files = models.JSONField(default=list, null=True)
-    input_filter = models.JSONField(default=list, null=True)
-    index_heading_entries = models.BooleanField(default=False)
-    user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
-
-
-class LocalMarkdownConfig(DbBaseModel):
-    input_files = models.JSONField(default=list, null=True)
-    input_filter = models.JSONField(default=list, null=True)
-    index_heading_entries = models.BooleanField(default=False)
-    user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
-
-
-class LocalPdfConfig(DbBaseModel):
-    input_files = models.JSONField(default=list, null=True)
-    input_filter = models.JSONField(default=list, null=True)
-    index_heading_entries = models.BooleanField(default=False)
-    user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
-
-
-class LocalPlaintextConfig(DbBaseModel):
-    input_files = models.JSONField(default=list, null=True)
-    input_filter = models.JSONField(default=list, null=True)
-    index_heading_entries = models.BooleanField(default=False)
-    user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
-
-
 class SearchModelConfig(DbBaseModel):
     class ModelType(models.TextChoices):
         TEXT = "text"
@@ -605,11 +576,10 @@ def __str__(self):
 class SpeechToTextModelOptions(DbBaseModel):
     class ModelType(models.TextChoices):
         OPENAI = "openai"
-        OFFLINE = "offline"
 
-    model_name = models.CharField(max_length=200, default="base")
+    model_name = models.CharField(max_length=200, default="whisper-1")
     friendly_name = models.CharField(max_length=200, default=None, null=True, blank=True)
-    model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OFFLINE)
+    model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OPENAI)
     price_tier = models.CharField(max_length=20, choices=PriceTier.choices, default=PriceTier.FREE)
     ai_model_api = models.ForeignKey(AiModelApi, on_delete=models.CASCADE, default=None, null=True, blank=True)