Skip to content

Commit c5bf2e8

Browse files
committed
Add deploy to github container registry
1 parent e870bcd commit c5bf2e8

File tree

8 files changed

+132
-40
lines changed

8 files changed

+132
-40
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Build and Deploy Docker Images
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
build:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout repository
14+
uses: actions/checkout@v2
15+
16+
- name: Set up Docker Buildx
17+
uses: docker/setup-buildx-action@v1
18+
19+
- name: Log in to GitHub Container Registry
20+
uses: docker/login-action@v1
21+
with:
22+
registry: ghcr.io
23+
username: ${{ github.repository_owner }}
24+
password: ${{ secrets.GITHUB_TOKEN }}
25+
26+
- name: Build and push backend Docker image
27+
uses: docker/build-push-action@v2
28+
with:
29+
context: ./backend
30+
file: ./backend/Dockerfile
31+
push: true
32+
tags: ghcr.io/${{ github.repository_owner }}/llm-tinyllama-backend:latest
33+
34+
- name: Build and push frontend Docker image
35+
uses: docker/build-push-action@v2
36+
with:
37+
context: ./frontend
38+
file: ./frontend/Dockerfile
39+
push: true
40+
tags: ghcr.io/${{ github.repository_owner }}/llm-tinyllama-frontend:latest

AI DevOps Engineer Challenge.pdf

-61.9 KB
Binary file not shown.

backend/Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Utiliza la imagen base de Python 3.8
2+
FROM python:3.8
3+
4+
# Establece el directorio de trabajo
5+
WORKDIR /app
6+
7+
# Copia los requerimientos y los instala
8+
COPY ./backend/requirements.txt /app/requirements.txt
9+
RUN pip install --no-cache-dir -r requirements.txt
10+
11+
# Copia el código del backend
12+
COPY ./backend /app
13+
14+
# Expone el puerto 8000
15+
EXPOSE 8000
16+
17+
# Comando para ejecutar el servidor de FastAPI con Uvicorn
18+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
267 Bytes
Binary file not shown.

backend/app/docs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
desc = 'LLM Chat Service by Adrián Baeza Prieto'
2+
version="1.0.0"

backend/app/main.py

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,71 @@
1-
from fastapi import FastAPI
2-
import torch
3-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
1+
'''
2+
##################### TinyLlama + FastAPI + Docker #########################################
3+
Autor: Santiago Gonzalez Acevedo
4+
Twitter: @locoalien
5+
Python 3.11+
6+
'''
7+
#https://medium.com/@santiagosk80/tinyllama-fastapi-docker-microservicios-llm-ff99eb999f04
8+
import logging
9+
import os
10+
import torch
11+
from fastapi import FastAPI, HTTPException
12+
from transformers import pipeline
13+
import docs #Libreria con informacion de la API en Swagger
14+
from starlette.middleware.cors import CORSMiddleware #Seguridad a nivel de CORS
15+
import json
416

5-
app = FastAPI()
17+
logger = logging.getLogger(__name__)
18+
# Crea una instancia de FastAPI
19+
app = FastAPI(title='LLM Chat Service', description=docs.desc, version=docs.version)
20+
# CORS Configuration (in-case you want to deploy)
21+
app.add_middleware(
22+
CORSMiddleware,
23+
allow_origins=["*"],
24+
allow_credentials=True,
25+
allow_methods=["GET", "POST", "OPTIONS"],
26+
allow_headers=["*"],
27+
)
28+
logger.info('Adding v1 endpoints..')
629

7-
torch.random.manual_seed(0)
8-
model = AutoModelForCausalLM.from_pretrained(
9-
"microsoft/Phi-3-mini-4k-instruct",
10-
device_map="cuda",
11-
torch_dtype="auto",
12-
trust_remote_code=True,
13-
)
30+
# Carga el modelo y el tokenizador
31+
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
1432

15-
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
16-
17-
messages = [
18-
{"role": "system", "content": "You are a helpful AI assistant."},
19-
{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
20-
{"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
21-
{"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
22-
]
23-
24-
pipe = pipeline(
25-
"text-generation",
26-
model=model,
27-
tokenizer=tokenizer,
28-
)
29-
30-
generation_args = {
31-
"max_new_tokens": 500,
32-
"return_full_text": False,
33-
"temperature": 0.0,
34-
"do_sample": False,
35-
}
36-
37-
@app.post("/predict")
38-
async def predict(text: str):
39-
output = pipe(messages, **generation_args)
40-
return {"prediction": output[0]['generated_text']}
33+
# Necesito un enpoint "/chat" que reciba un texto, lo pase por el modelo y devuelva la respuesta
34+
@app.post("/chat")
35+
async def chat(text: str):
36+
try:
37+
#Configuracion de comportamiento del modelo
38+
messages = [
39+
{
40+
"role": "system",
41+
"content": "Solo quiero la respuesta a la pregunta sin repetir la pregunta, por favor.",
42+
},
43+
{"role": "user", "content": f"{text}"},
44+
]
45+
#Obtener prompt para el modelo
46+
prompt = pipe.tokenizer.apply_chat_template(
47+
messages, tokenize=False, add_generation_prompt=True
48+
)
49+
#Configuracion de exactitud del modelo
50+
outputs = pipe(
51+
prompt,
52+
max_new_tokens=256,
53+
do_sample=True,
54+
temperature=0.3,
55+
top_k=50,
56+
top_p=0.95,
57+
)
58+
#Resultado del modelo
59+
output = outputs[0]["generated_text"]
60+
# Extraer la parte de la respuesta a partir de "<|assistant|>"
61+
assistant_response = output.split("<|assistant|>")[-1].strip()
62+
json_results = json_results = json.dumps({"response": assistant_response}, ensure_ascii=False, indent=4).encode('utf8')
63+
return json.loads(json_results)
64+
except Exception as e:
65+
logger.error(f'Error: {e}')
66+
raise HTTPException(status_code=500, detail='Internal Server Error')
67+
68+
# Ejecutar el servidor con uvicorn
69+
if __name__ == "__main__":
70+
import uvicorn
71+
uvicorn.run(app)

backend/requirements.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
fastapi==0.111.1
22
uvicorn==0.30.3
3-
flash_attn==2.5.8
43
torch==2.3.1
5-
accelerate==0.31.0
6-
transformers==4.41.2
4+
transformers==4.41.2
5+
pydantic==2.8.2
6+
huggingface_hub
7+
accelerate

cositas.docx

-12 KB
Binary file not shown.

0 commit comments

Comments
 (0)