adribaeza
diff --git a/‎backend/Dockerfile
Lines changed: 5 additions & 5 deletions b/‎backend/Dockerfile
Lines changed: 5 additions & 5 deletions
diff --git a/‎backend/app/docs.py
Lines changed: 7 additions & 1 deletion b/‎backend/app/docs.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎backend/app/main.py
Lines changed: 47 additions & 45 deletions b/‎backend/app/main.py
Lines changed: 47 additions & 45 deletions
diff --git a/‎backend/backend-deployment.yaml renamed to ‎backend/k8s/deployment.yaml b/‎backend/backend-deployment.yaml renamed to ‎backend/k8s/deployment.yaml
diff --git a/‎backend/backend-service.yaml renamed to ‎backend/k8s/service.yaml b/‎backend/backend-service.yaml renamed to ‎backend/k8s/service.yaml
diff --git a/‎frontend/frontend-deployment.yaml renamed to ‎frontend/k8s/deployment.yaml b/‎frontend/frontend-deployment.yaml renamed to ‎frontend/k8s/deployment.yaml
diff --git a/‎frontend/frontend-service.yaml renamed to ‎frontend/k8s/service.yaml b/‎frontend/frontend-service.yaml renamed to ‎frontend/k8s/service.yaml
@@ -1,14 +1,14 @@
-# Utiliza la imagen base de Python 3.8
-FROM python:3.8
+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
 
-# Establece el directorio de trabajo
+# Set the working directory in the container
 WORKDIR /app
 
-# Copia los requerimientos y los instala
+# Copy the requirements file into the container at /app and install dependencies
 COPY ./requirements.txt /app/requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Copia el código del backend
+# Copy backend code into the container at /app
 COPY ./app /app
 
 # Expone el puerto 8000
 
@@ -1,2 +1,8 @@
-desc = 'LLM Chat Service by Adrián Baeza Prieto'
+'''
+#####################  FastAPI + TinyLlama + Docker #########################################
+Autor: Adrián Baeza Prieto
+Github: @adribaeza
+Python 3.10+
+'''
+desc = 'LLM Chat Service with TinyLlama by Adrián Baeza Prieto'
 version="1.0.0"
@@ -1,24 +1,24 @@
 '''
-##################### TinyLlama + FastAPI + Docker #########################################
-Autor: Santiago Gonzalez Acevedo 
-Twitter: @locoalien
-Python 3.11+
+#####################  FastAPI + TinyLlama + Docker #########################################
+Autor: Adrián Baeza Prieto
+Github: @adribaeza
+Python 3.10+
 '''
-#https://medium.com/@santiagosk80/tinyllama-fastapi-docker-microservicios-llm-ff99eb999f04
 import logging
 import os
 import torch
 from fastapi import FastAPI, HTTPException
 from transformers import pipeline
-import docs #Libreria con informacion de la API en Swagger
-from starlette.middleware.cors import CORSMiddleware #Seguridad a nivel de CORS
+import docs #Import the documentation
+from starlette.middleware.cors import CORSMiddleware #Import the middleware
 import json
 
+#instance logger
 logger = logging.getLogger(__name__)
-# Crea una instancia de FastAPI
-app = FastAPI(title='LLM Chat Service', description=docs.desc, version=docs.version)
-# CORS Configuration (in-case you want to deploy)
-app.add_middleware(
+# Instance FastAPI
+api = FastAPI(title='LLM Chat Service with TinyLLama', description=docs.desc, version=docs.version)
+# Define configuration for the API
+api.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_credentials=True,
@@ -27,45 +27,47 @@
 )
 logger.info('Adding v1 endpoints..')
 
-# Carga el modelo y el tokenizador
+# Load the model with the TinyLlama model
 pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
 
-# Necesito un enpoint "/chat" que reciba un texto, lo pase por el modelo y devuelva la respuesta
-@app.post("/chat")
+# Declare the endpoint for the chat service
+@api.post("/chat")
 async def chat(text: str):
     try:
-         #Configuracion de comportamiento del modelo
-            messages = [
-                 {
-                    "role": "system",
-                    "content": "Solo quiero la respuesta a la pregunta sin repetir la pregunta, por favor.",
-                },
-                {"role": "user", "content": f"{text}"},
-            ]
-            #Obtener prompt para el modelo
-            prompt = pipe.tokenizer.apply_chat_template(
-                messages, tokenize=False, add_generation_prompt=True
-            )
-            #Configuracion de exactitud del modelo
-            outputs = pipe(
-                prompt,
-                max_new_tokens=256,
-                do_sample=True,
-                temperature=0.3,
-                top_k=50,
-                top_p=0.95,
-            )
-            #Resultado del modelo
-            output = outputs[0]["generated_text"]
-            # Extraer la parte de la respuesta a partir de "<|assistant|>"
-            assistant_response = output.split("<|assistant|>")[-1].strip()
-            json_results = json_results = json.dumps({"response": assistant_response}, ensure_ascii=False, indent=4).encode('utf8')
-            return json.loads(json_results)
+        #Define the messages to send to the model
+        messages = [
+            {
+                "role": "system",
+                "content": "Solo quiero la respuesta a la pregunta sin repetir la pregunta, por favor.",
+            },
+            {   
+                "role": "user", 
+                "content": f"{text}"
+            },
+        ]
+        #Get the prompt from the tokenizer
+        prompt = pipe.tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        #Model configuration
+        outputs = pipe(
+            prompt,
+            max_new_tokens=256,
+            do_sample=True,
+            temperature=0.3,
+            top_k=50,
+            top_p=0.95,
+        )
+        #Get the output from the model
+        output = outputs[0]["generated_text"]
+        assistant_response = output.split("<|assistant|>")[-1].strip()
+        json_results = json_results = json.dumps({"response": assistant_response}, ensure_ascii=False, indent=4).encode('utf8')
+        return json.loads(json_results)
     except Exception as e:
         logger.error(f'Error: {e}')
         raise HTTPException(status_code=500, detail='Internal Server Error')
 
-# Ejecutar el servidor con uvicorn
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app)
+# Execute the API with Uvicorn only if the script is executed directly in the local environment
+#if __name__ == '__main__':
+#    import uvicorn
+#    uvicorn.run(api)