Skip to content

feat: add locust and graphana for benchmarking #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed dist/solo_server-0.2.6-py3-none-any.whl
Binary file not shown.
Binary file removed dist/solo_server-0.2.6.tar.gz
Binary file not shown.
212 changes: 145 additions & 67 deletions solo_server/base.py
Original file line number Diff line number Diff line change
@@ -1,117 +1,195 @@
import typer
from subprocess import run, CalledProcessError
from subprocess import run, CalledProcessError, DEVNULL
import os
import sys
import time
import requests
import subprocess

app = typer.Typer(help="🛠️ Solo Server CLI for managing edge AI model inference using Docker-style commands.")

def execute_command(command: list):
"""Utility function to execute shell commands."""
try:
run(command, check=True)
except CalledProcessError as e:
typer.echo(f"❌ Error: {e}")
raise typer.Exit(code=1)

# Recurring prompt to ask for the next command
@app.command()
def prompt():
"""
🔄 Recurring prompt for managing the Solo Server.
"""
while True:
typer.echo("\nWhat would you like to do?")
typer.echo("1. 🚀 Start the Solo Server")
typer.echo("2. ⏹ Stop the Solo Server")
typer.echo("3. 📈 Check the Solo Server status")
typer.echo("4. 🖌️ Generate a code base template")
typer.echo("5. ❌ Exit")
choice = typer.prompt("Enter the number of your choice")

if choice == "1":
tag = typer.prompt("Enter the tag name to start the server with")
start(tag)
elif choice == "2":
stop()
elif choice == "3":
status()
elif choice == "4":
tag = typer.prompt("Enter the tag name for the code base template")
gen(tag)
elif choice == "5":
typer.echo("❌ Exiting the Solo Server CLI. Goodbye!")
break
else:
typer.echo("⚠️ Invalid choice. Please try again.")
def check_docker_installation():
"""Ensure Docker and Docker Compose are installed and user has necessary permissions."""
typer.echo("🔍 Checking Docker and Docker Compose installation...")

# Command to start the Solo Server, expects a tag name
# Check Docker
try:
run(["docker", "--version"], stdout=DEVNULL, stderr=DEVNULL, check=True)
except FileNotFoundError:
typer.echo("❌ Docker is not installed. Installing Docker...")
execute_command([
"curl", "-fsSL", "https://get.docker.com", "|", "sh"
])
except CalledProcessError:
typer.echo("❌ Docker is installed but not accessible. Please ensure you have the correct permissions.")
typer.echo("🔑 Run the following to add your user to the Docker group:")
typer.echo(" sudo usermod -aG docker $USER && newgrp docker")
sys.exit(1)

# Check Docker Compose
try:
run(["docker-compose", "--version"], stdout=DEVNULL, stderr=DEVNULL, check=True)
except FileNotFoundError:
typer.echo("❌ Docker Compose is not installed. Installing Docker Compose...")
execute_command([
"curl", "-L", "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)",
"-o", "/usr/local/bin/docker-compose"
])
execute_command(["chmod", "+x", "/usr/local/bin/docker-compose"])
except CalledProcessError:
typer.echo("❌ Docker Compose is installed but not accessible.")
sys.exit(1)

typer.echo("✅ Docker and Docker Compose are installed and accessible.")

@app.command()
def start(
tag: str,
model_url: str = typer.Option(
None,
"--model-url", "-u",
help="URL for the LLM model (only used with llm tag)"
),
model_filename: str = typer.Option(
None,
"--model-filename", "-f",
help="Filename for the LLM model (only used with llm tag)"
)
):
def start(tag: str):
"""
🚀 Start the Solo Server for model inference.
"""
check_docker_installation()
typer.echo(f"🚀 Starting the Solo Server with tag: {tag}...")

if tag == "llm":
# Default values for llm tag
default_url = "https://huggingface.co/Mozilla/Llama-3.2-1B-Instruct-llamafile/resolve/main/Llama-3.2-1B-Instruct.Q6_K.llamafile"
default_filename = "Llama-3.2-1B-Instruct.Q6_K.llamafile"

# Use provided values or defaults
os.environ["MODEL_URL"] = model_url or default_url
os.environ["MODEL_FILENAME"] = model_filename or default_filename
elif (model_url or model_filename) and tag != "llm":
typer.echo("⚠️ Warning: model-url and model-filename are only used with the llm tag")

python_file = f"templates/{tag}.py"
os.environ["PYTHON_FILE"] = python_file

# Get the current file's directory and construct the full path
current_dir = os.path.dirname(os.path.abspath(__file__))
docker_compose_path = os.path.join(current_dir, "docker-compose.yml")
execute_command(["docker-compose", "-f", docker_compose_path, "up", "--build"])
execute_command(["docker-compose", "-f", docker_compose_path, "up", "-d"])

# Command to stop the Solo Server
@app.command()
def stop():
"""
⏹ Stop the running Solo Server.
"""
check_docker_installation()
typer.echo("⏹ Stopping the Solo Server...")
current_dir = os.path.dirname(os.path.abspath(__file__))
docker_compose_path = os.path.join(current_dir, "docker-compose.yml")
execute_command(["docker-compose", "-f", docker_compose_path, "down"])

# Command to check the status of the Solo Server
@app.command()
def status():
"""
📈 Check the status of the Solo Server.
"""
check_docker_installation()
typer.echo("📈 Checking Solo Server status...")
current_dir = os.path.dirname(os.path.abspath(__file__))
docker_compose_path = os.path.join(current_dir, "docker-compose.yml")
execute_command(["docker-compose", "-f", docker_compose_path, "ps"])

# Command to generate a code base template related to the tag
@app.command()
def gen(tag: str):
def benchmark(
model_url: str = typer.Option(..., help="URL of the model to benchmark"),
model_filename: str = typer.Option(..., help="Filename for the downloaded model"),
template: str = typer.Option("llm", help="Template to use for benchmarking")
):
"""
🏎️ Run a benchmark test on the Solo Server with TimescaleDB and Grafana integration.
"""
check_docker_installation()

# First start the Solo Server with the specified template
typer.echo(f"🚀 Starting the Solo Server with template: {template}...")
python_file = f"templates/{template}.py"
os.environ["PYTHON_FILE"] = python_file
os.environ["MODEL_URL"] = model_url
os.environ["MODEL_FILENAME"] = model_filename

# Start the main server
current_dir = os.path.dirname(os.path.abspath(__file__))
docker_compose_path = os.path.join(current_dir, "docker-compose.yml")
execute_command(["docker-compose", "-f", docker_compose_path, "up", "-d"])

# Wait for container to be healthy
typer.echo("⏳ Waiting for LLM server to be ready...")
start_time = time.time()
timeout = 300 # 5 minutes timeout

while True:
if time.time() - start_time > timeout:
typer.echo("❌ LLM server startup timed out")
execute_command(["docker-compose", "-f", docker_compose_path, "down"])
return

result = subprocess.run(
["docker", "inspect", "--format", "{{.State.Health.Status}}", "solo-api"],
capture_output=True,
text=True
)
status = result.stdout.strip()

if status == "healthy":
typer.echo("✅ LLM server is ready!")
break
elif status == "unhealthy":
# Print the container logs to help debug
typer.echo("Checking container logs:")
subprocess.run(["docker", "logs", "solo-api"])
typer.echo("❌ LLM server failed to start")
execute_command(["docker-compose", "-f", docker_compose_path, "down"])
return

typer.echo("⏳ Waiting for LLM server to initialize... (Status: " + status + ")")
time.sleep(5)

# Now start the benchmark tools
typer.echo("🏎️ Starting benchmark tools...")
benchmark_compose_path = os.path.join(current_dir, "docker-compose-benchmark.yml")
execute_command(["docker-compose", "-f", benchmark_compose_path, "up", "-d", "timescale", "grafana", "locust"])

try:
# Wait for Grafana to be ready
typer.echo("⏳ Waiting for Grafana to be ready...")
time.sleep(10)

# Configure Grafana
typer.echo("🔧 Configuring Grafana...")
grafana_setup_path = os.path.join(current_dir, "grafana_setup.sh")
os.chmod(grafana_setup_path, 0o755)
execute_command([grafana_setup_path])

typer.echo("✅ Benchmark environment is ready!")
typer.echo("📊 Visit:")
typer.echo(" - Grafana: http://localhost:3000 (admin/admin)")
typer.echo(" - Locust: http://localhost:8089")

while True:
time.sleep(1)
except KeyboardInterrupt:
typer.echo("\n⏹ Stopping all services...")
finally:
# Stop both compose files
execute_command(["docker-compose", "-f", docker_compose_path, "down"])
execute_command(["docker-compose", "-f", benchmark_compose_path, "down"])

@app.command()
def gui():
"""
🖌️ Generate a code base template related to the tag.
🖥️ Launch the Streamlit GUI for Solo Server.
"""
typer.echo(f"🖌️ Generating code base template for tag: {tag}...")
# Add logic to generate a template based on the provided tag
typer.echo("🖥️ Launching Streamlit app...")

# Run Streamlit
streamlit_command = [
"streamlit",
"run",
"templates/streamlit_llm.py"
]

try:
print(execute_command(streamlit_command))
except Exception as e:
typer.echo(f"❌ Failed to launch Streamlit app: {e}")
else:
typer.echo("✅ Streamlit app launched successfully.")

if __name__ == "__main__":
app()
49 changes: 49 additions & 0 deletions solo_server/docker-compose-benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
version: '3.7'

services:
timescale:
image: timescale/timescaledb:latest-pg14
container_name: timescale_postgres
environment:
POSTGRES_PASSWORD: password
POSTGRES_DB: locust
ports:
- "5433:5432"
volumes:
- timescale_postgres_data:/var/lib/postgresql/data

grafana:
image: grafana/grafana:latest
container_name: timescale_grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
depends_on:
- timescale
volumes:
- grafana_data:/var/lib/grafana

locust:
image: locustio/locust:latest
container_name: locust_benchmark
volumes:
- ./locustfile.py:/home/locust/locustfile.py
command: >
-f /home/locust/locustfile.py
--host http://host.docker.internal:8000
--users 10
--spawn-rate 2
--run-time 1m
ports:
- "8089:8089"
extra_hosts:
- "host.docker.internal:host-gateway"

volumes:
timescale_postgres_data:
grafana_data:

networks:
solo-network:
driver: bridge
10 changes: 10 additions & 0 deletions solo_server/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
version: '3.7'

services:
solo-api:
build:
Expand All @@ -7,10 +9,18 @@ services:
container_name: "solo-api"
ports:
- "8000:8000"
- "8080:8080"
environment:
- PYTHON_FILE=${PYTHON_FILE:-solo_server/templates/basic.py}
- MODEL_URL=${MODEL_URL:-your_model_url_here}
- MODEL_FILENAME=${MODEL_FILENAME:-your_model_filename_here}
- LITSERVE_TIMEOUT=120
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/completion", "-H", "Content-Type: application/json", "-d", '{"prompt":"test","n_predict":1}']
interval: 10s
timeout: 30s
retries: 10
start_period: 120s

networks:
solo-network:
Expand Down
20 changes: 20 additions & 0 deletions solo_server/grafana_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

GRAFANA_URL="http://localhost:3000"
ADMIN_PASSWORD="admin"
DATASOURCE_NAME="TimescaleDB"

# Add a new TimescaleDB datasource
curl -X POST -H "Content-Type: application/json" \
-u admin:$ADMIN_PASSWORD \
-d '{
"name": "'"$DATASOURCE_NAME"'",
"type": "postgres",
"url": "timescale:5432",
"access": "proxy",
"database": "locust",
"user": "postgres",
"password": "password",
"isDefault": true
}' \
$GRAFANA_URL/api/datasources
33 changes: 33 additions & 0 deletions solo_server/locustfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from locust import HttpUser, task, between
import json

class SoloServerUser(HttpUser):
wait_time = between(1, 2)

@task
def test_llm(self):
"""Test LLM completions endpoint"""
headers = {
"Content-Type": "application/json"
}

payload = {
"prompt": "What is AI?",
"n_predict": 128
}

with self.client.post(
"/predict",
json=payload,
headers=headers,
catch_response=True
) as response:
try:
if response.status_code == 200:
response.success()
else:
response.failure(f"Failed with status code: {response.status_code}")
except json.JSONDecodeError:
response.failure("Response could not be decoded as JSON")
except Exception as e:
response.failure(f"Error: {str(e)}")
Loading