Skip to content

Commit 4eb238f

Browse files
committed
Correct backend
1 parent 7ff473e commit 4eb238f

File tree

7 files changed

+59
-51
lines changed

7 files changed

+59
-51
lines changed

backend/Dockerfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
# Utiliza la imagen base de Python 3.8
2-
FROM python:3.8
1+
# Use an official Python runtime as a parent image
2+
FROM python:3.10-slim
33

4-
# Establece el directorio de trabajo
4+
# Set the working directory in the container
55
WORKDIR /app
66

7-
# Copia los requerimientos y los instala
7+
# Copy the requirements file into the container at /app and install dependencies
88
COPY ./requirements.txt /app/requirements.txt
99
RUN pip install --no-cache-dir -r requirements.txt
1010

11-
# Copia el código del backend
11+
# Copy backend code into the container at /app
1212
COPY ./app /app
1313

1414
# Expone el puerto 8000

backend/app/docs.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
1-
desc = 'LLM Chat Service by Adrián Baeza Prieto'
1+
'''
2+
##################### FastAPI + TinyLlama + Docker #########################################
3+
Autor: Adrián Baeza Prieto
4+
Github: @adribaeza
5+
Python 3.10+
6+
'''
7+
desc = 'LLM Chat Service with TinyLlama by Adrián Baeza Prieto'
28
version="1.0.0"

backend/app/main.py

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
'''
2-
##################### TinyLlama + FastAPI + Docker #########################################
3-
Autor: Santiago Gonzalez Acevedo
4-
Twitter: @locoalien
5-
Python 3.11+
2+
##################### FastAPI + TinyLlama + Docker #########################################
3+
Autor: Adrián Baeza Prieto
4+
Github: @adribaeza
5+
Python 3.10+
66
'''
7-
#https://medium.com/@santiagosk80/tinyllama-fastapi-docker-microservicios-llm-ff99eb999f04
87
import logging
98
import os
109
import torch
1110
from fastapi import FastAPI, HTTPException
1211
from transformers import pipeline
13-
import docs #Libreria con informacion de la API en Swagger
14-
from starlette.middleware.cors import CORSMiddleware #Seguridad a nivel de CORS
12+
import docs #Import the documentation
13+
from starlette.middleware.cors import CORSMiddleware #Import the middleware
1514
import json
1615

16+
#instance logger
1717
logger = logging.getLogger(__name__)
18-
# Crea una instancia de FastAPI
19-
app = FastAPI(title='LLM Chat Service', description=docs.desc, version=docs.version)
20-
# CORS Configuration (in-case you want to deploy)
21-
app.add_middleware(
18+
# Instance FastAPI
19+
api = FastAPI(title='LLM Chat Service with TinyLLama', description=docs.desc, version=docs.version)
20+
# Define configuration for the API
21+
api.add_middleware(
2222
CORSMiddleware,
2323
allow_origins=["*"],
2424
allow_credentials=True,
@@ -27,45 +27,47 @@
2727
)
2828
logger.info('Adding v1 endpoints..')
2929

30-
# Carga el modelo y el tokenizador
30+
# Load the model with the TinyLlama model
3131
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
3232

33-
# Necesito un enpoint "/chat" que reciba un texto, lo pase por el modelo y devuelva la respuesta
34-
@app.post("/chat")
33+
# Declare the endpoint for the chat service
34+
@api.post("/chat")
3535
async def chat(text: str):
3636
try:
37-
#Configuracion de comportamiento del modelo
38-
messages = [
39-
{
40-
"role": "system",
41-
"content": "Solo quiero la respuesta a la pregunta sin repetir la pregunta, por favor.",
42-
},
43-
{"role": "user", "content": f"{text}"},
44-
]
45-
#Obtener prompt para el modelo
46-
prompt = pipe.tokenizer.apply_chat_template(
47-
messages, tokenize=False, add_generation_prompt=True
48-
)
49-
#Configuracion de exactitud del modelo
50-
outputs = pipe(
51-
prompt,
52-
max_new_tokens=256,
53-
do_sample=True,
54-
temperature=0.3,
55-
top_k=50,
56-
top_p=0.95,
57-
)
58-
#Resultado del modelo
59-
output = outputs[0]["generated_text"]
60-
# Extraer la parte de la respuesta a partir de "<|assistant|>"
61-
assistant_response = output.split("<|assistant|>")[-1].strip()
62-
json_results = json_results = json.dumps({"response": assistant_response}, ensure_ascii=False, indent=4).encode('utf8')
63-
return json.loads(json_results)
37+
#Define the messages to send to the model
38+
messages = [
39+
{
40+
"role": "system",
41+
"content": "Solo quiero la respuesta a la pregunta sin repetir la pregunta, por favor.",
42+
},
43+
{
44+
"role": "user",
45+
"content": f"{text}"
46+
},
47+
]
48+
#Get the prompt from the tokenizer
49+
prompt = pipe.tokenizer.apply_chat_template(
50+
messages, tokenize=False, add_generation_prompt=True
51+
)
52+
#Model configuration
53+
outputs = pipe(
54+
prompt,
55+
max_new_tokens=256,
56+
do_sample=True,
57+
temperature=0.3,
58+
top_k=50,
59+
top_p=0.95,
60+
)
61+
#Get the output from the model
62+
output = outputs[0]["generated_text"]
63+
assistant_response = output.split("<|assistant|>")[-1].strip()
64+
json_results = json_results = json.dumps({"response": assistant_response}, ensure_ascii=False, indent=4).encode('utf8')
65+
return json.loads(json_results)
6466
except Exception as e:
6567
logger.error(f'Error: {e}')
6668
raise HTTPException(status_code=500, detail='Internal Server Error')
6769

68-
# Ejecutar el servidor con uvicorn
69-
if __name__ == "__main__":
70-
import uvicorn
71-
uvicorn.run(app)
70+
# Execute the API with Uvicorn only if the script is executed directly in the local environment
71+
#if __name__ == '__main__':
72+
# import uvicorn
73+
# uvicorn.run(api)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)