adribaeza
diff --git a/‎.github/workflows/build-docker-images.yml
Lines changed: 40 additions & 0 deletions b/‎.github/workflows/build-docker-images.yml
Lines changed: 40 additions & 0 deletions
diff --git a/‎AI DevOps Engineer Challenge.pdf
-61.9 KB b/‎AI DevOps Engineer Challenge.pdf
-61.9 KB
diff --git a/‎backend/Dockerfile
Lines changed: 18 additions & 0 deletions b/‎backend/Dockerfile
Lines changed: 18 additions & 0 deletions
diff --git a/‎backend/app/__pycache__/docs.cpython-312.pyc
267 Bytes b/‎backend/app/__pycache__/docs.cpython-312.pyc
267 Bytes
diff --git a/‎backend/app/docs.py
Lines changed: 2 additions & 0 deletions b/‎backend/app/docs.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backend/app/main.py
Lines changed: 68 additions & 37 deletions b/‎backend/app/main.py
Lines changed: 68 additions & 37 deletions
diff --git a/‎backend/requirements.txt
Lines changed: 4 additions & 3 deletions b/‎backend/requirements.txt
Lines changed: 4 additions & 3 deletions
diff --git a/‎cositas.docx
-12 KB b/‎cositas.docx
-12 KB
@@ -0,0 +1,40 @@
+name: Build and Deploy Docker Images
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push backend Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          push: true
+          tags: ghcr.io/${{ github.repository_owner }}/llm-tinyllama-backend:latest
+
+      - name: Build and push frontend Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: ./frontend
+          file: ./frontend/Dockerfile
+          push: true
+          tags: ghcr.io/${{ github.repository_owner }}/llm-tinyllama-frontend:latest
@@ -0,0 +1,18 @@
+# Utiliza la imagen base de Python 3.8
+FROM python:3.8
+
+# Establece el directorio de trabajo
+WORKDIR /app
+
+# Copia los requerimientos y los instala
+COPY ./backend/requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copia el código del backend
+COPY ./backend /app
+
+# Expone el puerto 8000
+EXPOSE 8000
+
+# Comando para ejecutar el servidor de FastAPI con Uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,2 @@
+desc = 'LLM Chat Service by Adrián Baeza Prieto'
+version="1.0.0"
@@ -1,40 +1,71 @@
-from fastapi import FastAPI
-import torch 
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 
+'''
+##################### TinyLlama + FastAPI + Docker #########################################
+Autor: Santiago Gonzalez Acevedo 
+Twitter: @locoalien
+Python 3.11+
+'''
+#https://medium.com/@santiagosk80/tinyllama-fastapi-docker-microservicios-llm-ff99eb999f04
+import logging
+import os
+import torch
+from fastapi import FastAPI, HTTPException
+from transformers import pipeline
+import docs #Libreria con informacion de la API en Swagger
+from starlette.middleware.cors import CORSMiddleware #Seguridad a nivel de CORS
+import json
 
-app = FastAPI()
+logger = logging.getLogger(__name__)
+# Crea una instancia de FastAPI
+app = FastAPI(title='LLM Chat Service', description=docs.desc, version=docs.version)
+# CORS Configuration (in-case you want to deploy)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "OPTIONS"],
+    allow_headers=["*"],
+)
+logger.info('Adding v1 endpoints..')
 
-torch.random.manual_seed(0) 
-model = AutoModelForCausalLM.from_pretrained( 
-    "microsoft/Phi-3-mini-4k-instruct",  
-    device_map="cuda",  
-    torch_dtype="auto",  
-    trust_remote_code=True,  
-) 
+# Carga el modelo y el tokenizador
+pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
 
-tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") 
-
-messages = [ 
-    {"role": "system", "content": "You are a helpful AI assistant."}, 
-    {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}, 
-    {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."}, 
-    {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"}, 
-] 
-
-pipe = pipeline( 
-    "text-generation", 
-    model=model, 
-    tokenizer=tokenizer, 
-) 
-
-generation_args = { 
-    "max_new_tokens": 500, 
-    "return_full_text": False, 
-    "temperature": 0.0, 
-    "do_sample": False, 
-} 
-
-@app.post("/predict")
-async def predict(text: str):
-    output = pipe(messages, **generation_args) 
-    return {"prediction": output[0]['generated_text']}
+# Necesito un enpoint "/chat" que reciba un texto, lo pase por el modelo y devuelva la respuesta
+@app.post("/chat")
+async def chat(text: str):
+    try:
+         #Configuracion de comportamiento del modelo
+            messages = [
+                 {
+                    "role": "system",
+                    "content": "Solo quiero la respuesta a la pregunta sin repetir la pregunta, por favor.",
+                },
+                {"role": "user", "content": f"{text}"},
+            ]
+            #Obtener prompt para el modelo
+            prompt = pipe.tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+            #Configuracion de exactitud del modelo
+            outputs = pipe(
+                prompt,
+                max_new_tokens=256,
+                do_sample=True,
+                temperature=0.3,
+                top_k=50,
+                top_p=0.95,
+            )
+            #Resultado del modelo
+            output = outputs[0]["generated_text"]
+            # Extraer la parte de la respuesta a partir de "<|assistant|>"
+            assistant_response = output.split("<|assistant|>")[-1].strip()
+            json_results = json_results = json.dumps({"response": assistant_response}, ensure_ascii=False, indent=4).encode('utf8')
+            return json.loads(json_results)
+    except Exception as e:
+        logger.error(f'Error: {e}')
+        raise HTTPException(status_code=500, detail='Internal Server Error')
+    
+# Ejecutar el servidor con uvicorn
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app)
@@ -1,6 +1,7 @@
 fastapi==0.111.1
 uvicorn==0.30.3
-flash_attn==2.5.8
 torch==2.3.1
-accelerate==0.31.0
-transformers==4.41.2
+transformers==4.41.2
+pydantic==2.8.2
+huggingface_hub
+accelerate
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+desc = 'LLM Chat Service by Adrián Baeza Prieto'`
	`2`	`+version="1.0.0"`