Skip to content

feat: Set up complete Python testing infrastructure with Poetry #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,71 @@
# IDE files
.idea
venv
models
.vscode
*.swp
*.swo
*~

# Virtual environments
venv/
env/
ENV/
virtualenv/
.venv/

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# Testing
.pytest_cache/
.coverage
htmlcov/
coverage.xml
*.cover
.hypothesis/
.tox/
.nox/

# Model files
models/
gpt2-pytorch_model.bin
__pycache__
*.ckpt
*.pt
*.pth

# Claude
.claude/*

# Jupyter
.ipynb_checkpoints/
*.ipynb_checkpoints

# OS files
.DS_Store
Thumbs.db

# Logs
*.log

# Package manager
# Note: Do NOT ignore poetry.lock or uv.lock
dist/
*.whl
1,409 changes: 1,409 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
[tool.poetry]
name = "gpt2-pytorch"
version = "0.1.0"
description = "GPT-2 PyTorch implementation for text generation"
authors = ["Your Name <[email protected]>"]
readme = "README.md"
packages = [{include = "GPT2"}]

[tool.poetry.dependencies]
python = "^3.8"
torch = ">=0.4.1"
regex = "2017.4.5"
tqdm = "^4.66.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
pytest-mock = "^3.11.0"

[tool.poetry.scripts]
test = "pytest:main"
tests = "pytest:main"

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"-v",
"--strict-markers",
"--tb=short",
"--cov=GPT2",
"--cov-report=term-missing:skip-covered",
"--cov-report=html",
"--cov-report=xml",
"--cov-fail-under=80"
]
markers = [
"unit: Unit tests",
"integration: Integration tests",
"slow: Slow running tests"
]

[tool.coverage.run]
source = ["GPT2"]
omit = [
"*/tests/*",
"*/__pycache__/*",
"*/site-packages/*",
"*/venv/*",
"*/virtualenv/*"
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"def __str__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"class .*\\bProtocol\\):"
]
precision = 2
show_missing = true

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file added tests/__init__.py
Empty file.
132 changes: 132 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
Shared pytest fixtures and configuration for all tests.
"""
import os
import tempfile
from pathlib import Path
from typing import Generator, Dict, Any
import json
import shutil

import pytest


@pytest.fixture
def temp_dir() -> Generator[Path, None, None]:
"""Create a temporary directory for test files."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)


@pytest.fixture
def mock_config() -> Dict[str, Any]:
"""Provide a mock configuration dictionary for GPT2 model testing."""
return {
"n_vocab": 50257,
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_layer": 12,
"embd_pdrop": 0.1,
"resid_pdrop": 0.1,
"attn_pdrop": 0.1,
"layer_norm_epsilon": 1e-5,
"initializer_range": 0.02,
"summary_type": "cls_index",
"summary_use_proj": True,
"summary_activation": None,
"summary_proj_to_labels": True,
"summary_first_dropout": 0.1
}


@pytest.fixture
def sample_text() -> str:
"""Provide sample text for testing text generation."""
return "The quick brown fox jumps over the lazy dog"


@pytest.fixture
def mock_model_files(temp_dir: Path) -> Dict[str, Path]:
"""Create mock model files in a temporary directory."""
model_dir = temp_dir / "model"
model_dir.mkdir()

# Create mock encoder.json
encoder_data = {
"the": 262,
"quick": 2068,
"brown": 7586,
"fox": 21831,
"jumps": 18045,
"over": 625,
"lazy": 16931,
"dog": 3290,
"<|endoftext|>": 50256
}
encoder_path = model_dir / "encoder.json"
with open(encoder_path, 'w') as f:
json.dump(encoder_data, f)

# Create mock vocab.bpe
vocab_bpe_path = model_dir / "vocab.bpe"
with open(vocab_bpe_path, 'w') as f:
f.write("#version: 0.2\n")
f.write("Ġ t\n")
f.write("Ġ a\n")
f.write("h e\n")

# Create mock hparams.json
hparams_path = model_dir / "hparams.json"
with open(hparams_path, 'w') as f:
json.dump({
"n_vocab": 50257,
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_layer": 12
}, f)

return {
"model_dir": model_dir,
"encoder": encoder_path,
"vocab_bpe": vocab_bpe_path,
"hparams": hparams_path
}


@pytest.fixture
def mock_checkpoint_path(temp_dir: Path) -> Path:
"""Create a mock checkpoint file path."""
checkpoint_dir = temp_dir / "checkpoint"
checkpoint_dir.mkdir()
checkpoint_path = checkpoint_dir / "model.ckpt"
# Create an empty file to simulate checkpoint
checkpoint_path.touch()
return checkpoint_path


@pytest.fixture(autouse=True)
def cleanup_cache():
"""Clean up any test cache files after each test."""
yield
# Cleanup logic if needed
cache_dirs = [".pytest_cache", "__pycache__"]
for cache_dir in cache_dirs:
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir, ignore_errors=True)


@pytest.fixture
def capture_stdout(monkeypatch):
"""Capture stdout for testing print outputs."""
import io
import sys

captured_output = io.StringIO()
monkeypatch.setattr(sys, 'stdout', captured_output)

def get_output():
return captured_output.getvalue()

return get_output
Empty file added tests/integration/__init__.py
Empty file.
106 changes: 106 additions & 0 deletions tests/test_setup_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""
Validation tests to ensure the testing infrastructure is properly set up.
"""
import pytest
import sys
from pathlib import Path


class TestSetupValidation:
"""Test class to validate the testing setup."""

@pytest.mark.unit
def test_pytest_is_installed(self):
"""Verify pytest is installed and importable."""
import pytest
assert pytest.__version__

@pytest.mark.unit
def test_coverage_is_installed(self):
"""Verify pytest-cov is installed."""
import pytest_cov
assert pytest_cov

@pytest.mark.unit
def test_mock_is_installed(self):
"""Verify pytest-mock is installed."""
import pytest_mock
assert pytest_mock

@pytest.mark.unit
def test_project_structure_exists(self):
"""Verify the project structure is set up correctly."""
project_root = Path(__file__).parent.parent

# Check main directories exist
assert (project_root / "GPT2").exists()
assert (project_root / "tests").exists()
assert (project_root / "tests" / "unit").exists()
assert (project_root / "tests" / "integration").exists()

# Check __init__.py files exist
assert (project_root / "tests" / "__init__.py").exists()
assert (project_root / "tests" / "unit" / "__init__.py").exists()
assert (project_root / "tests" / "integration" / "__init__.py").exists()

@pytest.mark.unit
def test_conftest_fixtures(self, temp_dir, mock_config, sample_text):
"""Test that conftest fixtures are working properly."""
# Test temp_dir fixture
assert temp_dir.exists()
assert temp_dir.is_dir()

# Test mock_config fixture
assert isinstance(mock_config, dict)
assert "n_vocab" in mock_config
assert mock_config["n_vocab"] == 50257

# Test sample_text fixture
assert isinstance(sample_text, str)
assert len(sample_text) > 0

@pytest.mark.unit
def test_mock_model_files_fixture(self, mock_model_files):
"""Test the mock_model_files fixture creates necessary files."""
assert mock_model_files["model_dir"].exists()
assert mock_model_files["encoder"].exists()
assert mock_model_files["vocab_bpe"].exists()
assert mock_model_files["hparams"].exists()

@pytest.mark.unit
def test_gpt2_module_importable(self):
"""Test that GPT2 module can be imported."""
# Add parent directory to path if not already there
project_root = Path(__file__).parent.parent
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))

# Try importing GPT2 modules
import GPT2.config
import GPT2.encoder
import GPT2.model
import GPT2.utils
import GPT2.sample

assert GPT2.config
assert GPT2.encoder
assert GPT2.model
assert GPT2.utils
assert GPT2.sample

@pytest.mark.integration
def test_pytest_markers_configured(self):
"""Test that custom pytest markers are properly configured."""
# This test itself uses the integration marker
# If it runs, the marker is configured correctly
assert True

@pytest.mark.slow
def test_slow_marker_configured(self):
"""Test that the slow marker is configured."""
# This test uses the slow marker
import time
start = time.time()
time.sleep(0.1) # Simulate slow test
elapsed = time.time() - start
assert elapsed >= 0.1
Empty file added tests/unit/__init__.py
Empty file.