Skip to content

Commit 07f664e

Browse files
committed
1 parent 196eecc commit 07f664e

File tree

9 files changed

+890
-6
lines changed

9 files changed

+890
-6
lines changed
Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,45 @@
11
FROM python:3.10-slim
22

3-
# update packages, install git and remove cache
4-
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
5-
63
WORKDIR /app
74

5+
# Install system dependencies including Git, bash, and PDF conversion tools
6+
RUN apt-get update && apt-get install -y --no-install-recommends \
7+
git \
8+
bash \
9+
pandoc \
10+
wkhtmltopdf \
11+
texlive-xetex \
12+
texlive-fonts-recommended \
13+
texlive-plain-generic \
14+
&& apt-get clean \
15+
&& rm -rf /var/lib/apt/lists/*
16+
17+
# Copy requirements first for better caching
818
COPY requirements.txt .
19+
20+
# Install dependencies
921
RUN pip install --no-cache-dir -r requirements.txt
1022

23+
# Copy the rest of the application
1124
COPY . .
1225

13-
ENTRYPOINT ["python", "main.py"]
26+
# Create necessary directories with proper permissions
27+
RUN mkdir -p logs output && chmod -R 777 logs output
28+
29+
# Expose the Streamlit port
30+
EXPOSE 8501
31+
32+
# Set environment variables
33+
ENV PYTHONUNBUFFERED=1
34+
ENV PYTHONDONTWRITEBYTECODE=1
35+
ENV STREAMLIT_SERVER_PORT=8501
36+
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
37+
ENV STREAMLIT_SERVER_HEADLESS=true
38+
ENV LOG_DIR=/app/logs
39+
ENV CACHE_FILE=/app/llm_cache.json
40+
ENV CACHE_ENABLED=true
41+
ENV GIT_PYTHON_REFRESH=quiet
42+
ENV OUTPUT_DIR=/app/output
43+
44+
# Default command (can be overridden by docker-compose)
45+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

Tutorial-Codebase-Knowledge/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,4 +443,4 @@
443443
### Requirements
444444
- A public GitHub repository
445445
- Google Gemini API access (configured via environment variables)
446-
""")
446+
""")
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
version: '3'
2+
3+
services:
4+
tutorial-generator:
5+
build:
6+
context: .
7+
dockerfile: Dockerfile
8+
ports:
9+
- "8501:8501"
10+
volumes:
11+
- ./output:/app/output
12+
- ./logs:/app/logs
13+
- ./llm_cache.json:/app/llm_cache.json
14+
env_file:
15+
- .env
16+
environment:
17+
- PYTHONUNBUFFERED=1
18+
- PYTHONDONTWRITEBYTECODE=1
19+
- STREAMLIT_SERVER_PORT=8501
20+
- STREAMLIT_SERVER_ADDRESS=0.0.0.0
21+
- STREAMLIT_SERVER_HEADLESS=true
22+
- LOG_DIR=/app/logs
23+
- CACHE_FILE=/app/llm_cache.json
24+
- CACHE_ENABLED=true
25+
- OUTPUT_DIR=/app/output
26+
restart: unless-stopped
27+
# Ensure the container has write permissions to the output directory
28+
user: "${UID:-1000}:${GID:-1000}"
29+
# Create output directory with proper permissions
30+
# mkdir -p /app/output &&
31+
# chmod -R 777 /app/output &&
32+
# mkdir -p /app/logs &&
33+
# chmod -R 777 /app/logs &&
34+
# touch /app/llm_cache.json &&
35+
# chmod 666 /app/llm_cache.json &&
36+
command: >
37+
bash -c "
38+
streamlit run app.py --server.port=8501 --server.address=0.0.0.0
39+
"

Tutorial-Codebase-Knowledge/requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,8 @@ gitpython>=3.1.0
55
google-cloud-aiplatform>=1.25.0
66
google-genai>=1.9.0
77
python-dotenv>=1.0.0
8-
pathspec>=0.11.0
8+
streamlit>=1.32.0
9+
markdown>=3.4.0
10+
pdfkit>=1.0.0
11+
weasyprint>=59.0
12+
pymdown-extensions>=10.0.0
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
from utils.markdown_converter import markdown_to_html, markdown_to_pdf, create_combined_markdown, get_file_contents
3+
4+
# Test directory
5+
output_dir = "output/GIM-BACK"
6+
7+
# If the directory doesn't exist, try to find it
8+
if not os.path.exists(output_dir):
9+
output_base_dir = "output"
10+
if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
11+
project_dirs = [d for d in os.listdir(output_base_dir)
12+
if os.path.isdir(os.path.join(output_base_dir, d))]
13+
print(f"Available project directories: {project_dirs}")
14+
if project_dirs:
15+
output_dir = os.path.join(output_base_dir, project_dirs[0])
16+
print(f"Using first available directory: {output_dir}")
17+
18+
# Check if output directory exists
19+
if os.path.exists(output_dir) and os.path.isdir(output_dir):
20+
print(f"Output directory exists: {output_dir}")
21+
22+
# Get file contents
23+
file_contents = get_file_contents(output_dir, '.md')
24+
print(f"Found {len(file_contents)} markdown files")
25+
26+
# Create combined markdown
27+
combined_content, combined_file_path = create_combined_markdown(
28+
file_contents,
29+
os.path.join(output_dir, "test_combined.md")
30+
)
31+
32+
if combined_content and combined_file_path:
33+
print(f"Created combined markdown file: {combined_file_path}")
34+
35+
# Convert to HTML
36+
html_content = markdown_to_html(combined_content)
37+
if html_content:
38+
html_file_path = os.path.join(output_dir, "test_combined.html")
39+
with open(html_file_path, "w", encoding="utf-8") as f:
40+
f.write(html_content)
41+
print(f"Created HTML file: {html_file_path}")
42+
else:
43+
print("Failed to convert to HTML")
44+
45+
# Convert to PDF
46+
pdf_file_path = os.path.join(output_dir, "test_combined.pdf")
47+
pdf_path = markdown_to_pdf(combined_content, pdf_file_path)
48+
if pdf_path and os.path.exists(pdf_path):
49+
print(f"Created PDF file: {pdf_path}")
50+
else:
51+
print("Failed to convert to PDF")
52+
else:
53+
print("Failed to create combined markdown")
54+
else:
55+
print(f"Output directory does not exist: {output_dir}")
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
3+
# Test directory
4+
output_dir = "output/GIM-BACK"
5+
6+
# If the directory doesn't exist, try to find it
7+
if not os.path.exists(output_dir):
8+
output_base_dir = "output"
9+
if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
10+
project_dirs = [d for d in os.listdir(output_base_dir)
11+
if os.path.isdir(os.path.join(output_base_dir, d))]
12+
print(f"Available project directories: {project_dirs}")
13+
if project_dirs:
14+
output_dir = os.path.join(output_base_dir, project_dirs[0])
15+
print(f"Using first available directory: {output_dir}")
16+
17+
# Check if output directory exists
18+
if os.path.exists(output_dir) and os.path.isdir(output_dir):
19+
print(f"Output directory exists: {output_dir}")
20+
21+
# List files in the directory
22+
files = sorted(os.listdir(output_dir))
23+
print(f"Files in directory: {files}")
24+
25+
# Read and print the content of each file
26+
for file in files:
27+
file_path = os.path.join(output_dir, file)
28+
if os.path.isfile(file_path):
29+
try:
30+
with open(file_path, "r", encoding="utf-8") as f:
31+
content = f.read()
32+
print(f"\n--- {file} ---")
33+
print(f"First 100 characters: {content[:100]}...")
34+
except Exception as e:
35+
print(f"Error reading file {file}: {str(e)}")
36+
else:
37+
print(f"Output directory does not exist: {output_dir}")
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
3+
# Test directory detection logic
4+
output_base_dir = "output"
5+
project_name = "GIM-BACK"
6+
7+
# Test with non-existent directory
8+
non_existent_dir = "output/NON-EXISTENT"
9+
print(f"\nTesting with non-existent directory: {non_existent_dir}")
10+
if os.path.exists(non_existent_dir) and os.path.isdir(non_existent_dir):
11+
print(f"Directory exists: {non_existent_dir}")
12+
else:
13+
print(f"Directory does not exist: {non_existent_dir}")
14+
15+
# Try to find it in the output base directory
16+
if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
17+
project_dirs = [d for d in os.listdir(output_base_dir)
18+
if os.path.isdir(os.path.join(output_base_dir, d))]
19+
20+
print(f"Available project directories: {project_dirs}")
21+
else:
22+
print(f"Output base directory does not exist: {output_base_dir}")
23+
24+
print("\nTesting with existing directory:")
25+
26+
print(f"Checking if output directory exists: {output_base_dir}")
27+
if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
28+
print(f"Output base directory exists: {output_base_dir}")
29+
30+
# List all directories in the output base directory
31+
project_dirs = [d for d in os.listdir(output_base_dir)
32+
if os.path.isdir(os.path.join(output_base_dir, d))]
33+
34+
print(f"Found project directories: {project_dirs}")
35+
36+
if project_name and project_name in project_dirs:
37+
# Found the project directory
38+
actual_output_dir = os.path.join(output_base_dir, project_name)
39+
print(f"Found project directory: {actual_output_dir}")
40+
41+
# List files in the project directory
42+
files = os.listdir(actual_output_dir)
43+
print(f"Files in project directory: {files}")
44+
else:
45+
print(f"Project directory '{project_name}' not found in {output_base_dir}")
46+
else:
47+
print(f"Output base directory does not exist: {output_base_dir}")

0 commit comments

Comments
 (0)