more work on running legacy docs

gvwilson · gvwilson · commit 8e7ecf802b40 · 2025-07-25T10:59:19.000-04:00
diff --git a/Makefile b/Makefile
@@ -4,7 +4,6 @@ RUN = uv run
 PACKAGE_DIRS = _plotly_utils plotly
 CODE_DIRS = ${PACKAGE_DIRS} scripts
 EXAMPLE_SRC = $(wildcard doc/python/*.md)
-EXAMPLE_DST = $(patsubst doc/python/%.md,pages/examples/%.md,${EXAMPLE_SRC})
 
 ## commands: show available commands
 commands:
@@ -23,11 +22,9 @@ docs-lint:
 docs-tmp:
 	MKDOCS_TEMP_DIR=./docs_tmp ${RUN} mkdocs build
 
-## examples: temporary target to copy and run doc/python
-examples: ${EXAMPLE_DST}
-
-pages/examples/%.md: doc/python/%.md
-	${RUN} bin/run_markdown.py --output $@ $<
+## examples: generate Markdown from doc/python
+examples:
+	${RUN} bin/run_markdown.py --outdir pages/examples --inline --verbose ${EXAMPLE_SRC}
 
 ## format: reformat code
 format:
diff --git a/bin/run_markdown.py b/bin/run_markdown.py
@@ -8,131 +8,89 @@
 from contextlib import redirect_stdout, redirect_stderr
 import io
 from pathlib import Path
+import plotly.graph_objects as go
 import sys
 import traceback
 
 
-def parse_markdown(content):
-    """Parse markdown content and extract Python code blocks."""
-    lines = content.split("\n")
-    blocks = []
-    current_block = None
-    in_code_block = False
-
-    for i, line in enumerate(lines):
-        # Start of Python code block
-        if line.strip().startswith("```python"):
-            in_code_block = True
-            current_block = {
-                "start_line": i,
-                "end_line": None,
-                "code": [],
-                "type": "python",
-            }
-
-        # End of code block
-        elif line.strip() == "```" and in_code_block:
-            in_code_block = False
-            current_block["end_line"] = i
-            current_block["code"] = "\n".join(current_block["code"])
-            blocks.append(current_block)
-            current_block = None
-
-        # Line inside code block
-        elif in_code_block:
-            current_block["code"].append(line)
+def main():
+    args = _parse_args()
+    for filename in args.input:
+        _do_file(args, Path(filename))
 
-    return blocks
 
+def _do_file(args, input_file):
+    """Process a single file."""
 
-def execute_python_code(code, output_dir, output_figure_stem):
-    """Execute Python code and capture output and generated files."""
-    # Capture stdout and stderr
-    stdout_buffer = io.StringIO()
-    stderr_buffer = io.StringIO()
+    # Validate input file
+    if not input_file.exists():
+        print(f"Error: '{input_file}' not found", file=sys.stderr)
+        sys.exit(1)
 
-    # Track files created during execution
-    output_path = Path(output_dir)
-    if not output_path.exists():
-        output_path.mkdir(parents=True, exist_ok=True)
+    # Determine output file path etc.
+    stem = input_file.stem
+    output_file = args.outdir / f"{input_file.stem}{input_file.suffix}"
+    if input_file.resolve() == output_file.resolve():
+        print(f"Error: output would overwrite input '{input_file}'", file=sys.stderr)
+        sys.exit(1)
 
-    files_before = set(f.name for f in output_path.iterdir())
-    result = {"stdout": "", "stderr": "", "error": None, "images": [], "html_files": []}
-    figures = []
+    # Read input
     try:
-        # Create a custom show function to capture plotly figures
-        def capture_plotly_show(fig):
-            """Custom show function that saves plotly figures instead of displaying them."""
-            nonlocal figures
-            figures.append(fig)
-            png_filename = (
-                f"{output_figure_stem}_{len(figures)}.png"
-            )
-            png_path = Path(output_dir) / png_filename
-            fig.write_image(png_path, width=800, height=600)
-            result["images"].append(png_filename)
-            print(f"Plotly figure saved as PNG: {png_filename}")
-            return
-
-        # Create a namespace for code execution
-        exec_globals = {
-            "__name__": "__main__",
-            "__file__": "<markdown_code>",
-        }
-
-        # Monkey patch plotly show method to capture figures
-        original_show = None
-
-        # Execute the code with output capture
-        with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
-            # Try to import plotly and patch the show method
-            def patched_show(self, *args, **kwargs):
-                capture_plotly_show(self)
-            import plotly.graph_objects as go
-            original_show = go.Figure.show
-            go.Figure.show = patched_show
+        with open(input_file, "r", encoding="utf-8") as f:
+            content = f.read()
+    except Exception as e:
+        print(f"Error reading input file: {e}", file=sys.stderr)
+        sys.exit(1)
 
-            # Execute the code
-            exec(code, exec_globals)
+    # Parse markdown and extract code blocks
+    _report(args.verbose, f"Processing {input_file}...")
+    code_blocks = _parse_md(content)
+    _report(args.verbose, f"- Found {len(code_blocks)} code blocks")
 
-            # Try to find and handle any plotly figures that were created and not already processed
-            for name, obj in exec_globals.items():
-                if (
-                    hasattr(obj, "__class__")
-                    and "plotly" in str(type(obj)).lower()
-                    and hasattr(obj, "show")
-                ):
-                    # This looks like a plotly figure that wasn't already processed by show()
-                    if obj not in figures:
-                        print("NOT ALREADY PROCESSED", obj, file=sys.stderr)
-                        capture_plotly_show(obj)
-
-        # Restore original show method if we patched it
-        if original_show:
-            import plotly.graph_objects as go
-            go.Figure.show = original_show
+    # Execute code blocks and collect results
+    execution_results = []
+    figure_counter = 0
+    for i, block in enumerate(code_blocks):
+        _report(args.verbose, f"- Executing block {i + 1}/{len(code_blocks)}")
+        figure_counter, result = _run_code(block["code"], args.outdir, stem, figure_counter)
+        execution_results.append(result)
+        _report(result["error"], f"  - Warning: block {i + 1} had an error")
+        _report(result["images"], f"  - Generated {len(result['images'])} image(s)")
 
+    # Generate and save output
+    content = _generate_markdown(args, content, code_blocks, execution_results, args.outdir)
+    try:
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(content)
+        _report(args.verbose, f"- Output written to {output_file}")
+        _report(any(result["images"] for result in execution_results), f"- Images saved to {args.outdir}")
     except Exception as e:
-        result["error"] = f"Error executing code: {str(e)}\n{traceback.format_exc()}"
+        print(f"Error writing output file: {e}", file=sys.stderr)
+        sys.exit(1)
 
-    result["stdout"] = stdout_buffer.getvalue()
-    result["stderr"] = stderr_buffer.getvalue()
 
-    # Check for any additional files created
-    output_path = Path(output_dir)
-    if output_path.exists():
-        files_after = set(f.name for f in output_path.iterdir())
-        for f in (files_after - files_before):
-            if f not in result["images"] and file.lower().endswith(".png"):
-                result["images"].append(f)
+def _capture_plotly_show(fig, counter, result, output_dir, stem):
+    """Saves figures instead of displaying them."""
+    print(f"CAPTURE SHOW counter is {counter}")
 
-    return result
+    # Save PNG
+    png_filename = f"{stem}_{counter}.png"
+    png_path = output_dir / png_filename
+    fig.write_image(png_path, width=800, height=600)
+    result["images"].append(png_filename)
 
+    # Save HTML and get the content for embedding
+    html_filename = f"{stem}_{counter}.html"
+    html_path = output_dir / html_filename
+    fig.write_html(html_path, include_plotlyjs="cdn")
+    html_content = fig.to_html(include_plotlyjs="cdn", div_id=f"plotly-div-{counter}", full_html=False)
+    result["html_files"].append(html_filename)
+    result.setdefault("html_content", []).append(html_content)
 
-def generate_output_markdown(content, code_blocks, execution_results, output_dir):
+
+def _generate_markdown(args, content, code_blocks, execution_results, output_dir):
     """Generate the output markdown with embedded results."""
     lines = content.split("\n")
-    output_lines = []
 
     # Sort code blocks by start line in reverse order for safe insertion
     sorted_blocks = sorted(
@@ -173,10 +131,13 @@ def generate_output_markdown(content, code_blocks, execution_results, output_dir
             insert_lines.append("")
             insert_lines.append(f"![Generated Plot](./{image})")
 
-        # Add HTML files (for plotly figures)
-        for html_file in result.get("html_files", []):
-            insert_lines.append("")
-            insert_lines.append(f"[Interactive Plot](./{html_file})")
+        # Embed HTML content for plotly figures
+        if args.inline:
+            for html_content in result.get("html_content", []):
+                insert_lines.append("")
+                insert_lines.append("**Interactive Plot:**")
+                insert_lines.append("")
+                insert_lines.extend(html_content.split("\n"))
 
         # Insert the results after the code block
         if insert_lines:
@@ -187,75 +148,100 @@ def generate_output_markdown(content, code_blocks, execution_results, output_dir
     return "\n".join(lines)
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Process Markdown files with Python code blocks and generate output with results"
-    )
-    parser.add_argument("input_file", help="Input Markdown file")
-    parser.add_argument(
-        "-o", "--output", help="Output Markdown file (default: input_output.md)"
-    )
-    args = parser.parse_args()
+def _parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Process Markdown files with code blocks")
+    parser.add_argument("input", nargs="+", help="Input .md file")
+    parser.add_argument("--inline", action="store_true", help="Inline HTML in .md")
+    parser.add_argument("--outdir", type=Path, help="Output directory")
+    parser.add_argument("--verbose", action="store_true", help="Report progress")
+    return parser.parse_args()
 
-    # Validate input file
-    if not Path(args.input_file).exists():
-        print(f"Error: Input file '{args.input_file}' not found", file=sys.stderr)
-        sys.exit(1)
 
-    # Determine output file path
-    if args.output:
-        output_file = args.output
-    else:
-        input_path = Path(args.input_file)
-        output_file = str(
-            input_path.parent / f"{input_path.stem}_output{input_path.suffix}"
-        )
+def _parse_md(content):
+    """Parse Markdown and extract Python code blocks."""
+    lines = content.split("\n")
+    blocks = []
+    current_block = None
+    in_code_block = False
 
-    # Determine output directory for images
-    output_dir = str(Path(output_file).parent)
+    for i, line in enumerate(lines):
+        # Start of Python code block
+        if line.strip().startswith("```python"):
+            in_code_block = True
+            current_block = {
+                "start_line": i,
+                "end_line": None,
+                "code": [],
+                "type": "python",
+            }
 
-    # Read input file
-    try:
-        with open(args.input_file, "r", encoding="utf-8") as f:
-            content = f.read()
-    except Exception as e:
-        print(f"Error reading input file: {e}", file=sys.stderr)
-        sys.exit(1)
+        # End of code block
+        elif line.strip() == "```" and in_code_block:
+            in_code_block = False
+            current_block["end_line"] = i
+            current_block["code"] = "\n".join(current_block["code"])
+            blocks.append(current_block)
+            current_block = None
 
-    print(f"Processing {args.input_file}...")
-    output_figure_stem = Path(output_file).stem
+        # Line inside code block
+        elif in_code_block:
+            current_block["code"].append(line)
 
-    # Parse markdown and extract code blocks
-    code_blocks = parse_markdown(content)
-    print(f"Found {len(code_blocks)} Python code blocks")
+    return blocks
 
-    # Execute code blocks and collect results
-    execution_results = []
-    for i, block in enumerate(code_blocks):
-        print(f"Executing code block {i + 1}/{len(code_blocks)}...")
-        result = execute_python_code(block["code"], output_dir, output_figure_stem)
-        execution_results.append(result)
 
-        if result["error"]:
-            print(f"  Warning: Code block {i + 1} had an error")
-        if result["images"]:
-            print(f"  Generated {len(result['images'])} image(s)")
+def _report(condition, message):
+    """Report if condition is true."""
+    if condition:
+        print(message, file=sys.stderr)
 
-    # Generate output markdown
-    output_content = generate_output_markdown(
-        content, code_blocks, execution_results, output_dir
-    )
 
-    # Write output file
+def _run_code(code, output_dir, stem, figure_counter):
+    """Execute code capturing output and generated files."""
+    # Capture stdout and stderr
+    stdout_buffer = io.StringIO()
+    stderr_buffer = io.StringIO()
+
+    # Track files created during execution
+    if not output_dir.exists():
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+    files_before = set(f.name for f in output_dir.iterdir())
+    result = {"stdout": "", "stderr": "", "error": None, "images": [], "html_files": []}
     try:
-        with open(output_file, "w", encoding="utf-8") as f:
-            f.write(output_content)
-        print(f"Output written to {output_file}")
-        if any(result["images"] for result in execution_results):
-            print(f"Images saved to {output_dir}")
+
+        # Create a namespace for code execution
+        exec_globals = {
+            "__name__": "__main__",
+            "__file__": "<markdown_code>",
+        }
+
+        # Execute the code with output capture
+        with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
+            # Try to import plotly and patch the show method
+            def patched_show(self, *args, **kwargs):
+                nonlocal figure_counter
+                figure_counter += 1
+                _capture_plotly_show(self, figure_counter, result, output_dir, stem)
+            original_show = go.Figure.show
+            go.Figure.show = patched_show
+            exec(code, exec_globals)
+            go.Figure.show = original_show
+
     except Exception as e:
-        print(f"Error writing output file: {e}", file=sys.stderr)
-        sys.exit(1)
+        result["error"] = f"Error executing code: {str(e)}\n{traceback.format_exc()}"
+
+    result["stdout"] = stdout_buffer.getvalue()
+    result["stderr"] = stderr_buffer.getvalue()
+
+    # Check for any additional files created
+    files_after = set(f.name for f in output_dir.iterdir())
+    for f in (files_after - files_before):
+        if f not in result["images"] and f.lower().endswith(".png"):
+            result["images"].append(f)
+
+    return figure_counter, result
 
 
 if __name__ == "__main__":