|
2 | 2 | Fixtures for the testing of Mkdocs-Macros (pytest)
|
3 | 3 | This program must be in the test directory.
|
4 | 4 |
|
| 5 | +This is the two classes: |
| 6 | +
|
| 7 | +- DocProject |
| 8 | +- TestMarkdownPage |
| 9 | + |
| 10 | +
|
5 | 11 | (C) Laurent Franceschetti 2024
|
6 | 12 | """
|
7 | 13 |
|
8 | 14 | import os
|
9 |
| -from io import StringIO |
10 | 15 | import yaml
|
11 | 16 | import subprocess
|
12 | 17 | import re
|
13 | 18 | from dataclasses import dataclass, field
|
14 | 19 | from typing import List
|
15 | 20 | import json
|
16 | 21 | from typing import Any, List
|
17 |
| -import inspect |
| 22 | + |
18 | 23 |
|
19 | 24 |
|
20 | 25 | # from rich import print
|
21 |
| -import markdown |
22 | 26 | from bs4 import BeautifulSoup
|
23 |
| -import pandas as pd |
24 |
| -import rich |
25 |
| -from rich.table import Table |
26 | 27 |
|
27 | 28 |
|
28 | 29 | "A dictionary where the keys are also accessible with the dot notation"
|
29 | 30 | from mkdocs_macros.util import SuperDict
|
| 31 | +from .fixture_util import (get_frontmatter, markdown_to_html, get_first_h1, |
| 32 | + find_in_html, find_after, list_markdown_files, find_page, |
| 33 | + run_command) |
30 | 34 |
|
31 | 35 | # ---------------------------
|
32 | 36 | # Initialization
|
@@ -63,269 +67,6 @@ def list_doc_projects(directory:str):
|
63 | 67 | "The error string"
|
64 | 68 | MACRO_ERROR_STRING = '# _Macro Rendering Error_'
|
65 | 69 |
|
66 |
| - |
67 |
| -# --------------------------- |
68 |
| -# Print functions |
69 |
| -# --------------------------- |
70 |
| -std_print = print |
71 |
| -from rich import print |
72 |
| -from rich.panel import Panel |
73 |
| - |
74 |
| -TITLE_COLOR = 'green' |
75 |
| -def h1(s:str, color:str=TITLE_COLOR): |
76 |
| - "Color print a 1st level title to the console" |
77 |
| - print() |
78 |
| - print(Panel(f"[{color} bold]{s}", style=color, width=80)) |
79 |
| - |
80 |
| -def h2(s:str, color:str=TITLE_COLOR): |
81 |
| - "Color print a 2nd level title to the consule" |
82 |
| - print() |
83 |
| - print(f"[green bold underline]{s}") |
84 |
| - |
85 |
| -def h3(s:str, color:str=TITLE_COLOR): |
86 |
| - "Color print a 2nd level title to the consule" |
87 |
| - print() |
88 |
| - print(f"[green underline]{s}") |
89 |
| - |
90 |
| -# --------------------------- |
91 |
| -# Low-level functions |
92 |
| -# --------------------------- |
93 |
| - |
94 |
| -def find_after(s:str, word:str, pattern:str): |
95 |
| - """ |
96 |
| - Find the the first occurence of a pattern after a word |
97 |
| - (Both word and pattern can be regex, and the matching |
98 |
| - is case insensitive.) |
99 |
| - """ |
100 |
| - word_pattern = re.compile(word, re.IGNORECASE) |
101 |
| - parts = word_pattern.split(s, maxsplit=1) |
102 |
| - # parts = s.split(word, 1) |
103 |
| - |
104 |
| - if len(parts) > 1: |
105 |
| - # Strip the remainder and search for the pattern |
106 |
| - remainder = parts[1].strip() |
107 |
| - match = re.search(pattern, remainder, flags=re.IGNORECASE) |
108 |
| - return match.group(0) if match else None |
109 |
| - else: |
110 |
| - return None |
111 |
| - |
112 |
| -def list_markdown_files(directory:str): |
113 |
| - """ |
114 |
| - Makes a list of markdown files in a directory |
115 |
| - """ |
116 |
| - markdown_files = [] |
117 |
| - for root, dirs, files in os.walk(directory): |
118 |
| - for file in files: |
119 |
| - if file.endswith('.md') or file.endswith('.markdown'): |
120 |
| - relative_path = os.path.relpath(os.path.join(root, file), directory) |
121 |
| - markdown_files.append(relative_path) |
122 |
| - return markdown_files |
123 |
| - |
124 |
| - |
125 |
| -def markdown_to_html(markdown_text): |
126 |
| - """Convert markdown text to HTML.""" |
127 |
| - html = markdown.markdown(markdown_text, extensions=["tables"]) |
128 |
| - # print("HTML:") |
129 |
| - # print(html) |
130 |
| - return html |
131 |
| - |
132 |
| - |
133 |
| -def style_dataframe(df:pd.DataFrame): |
134 |
| - """ |
135 |
| - Apply beautiful and colorful styling to any dataframe |
136 |
| - (patches the dataframe). |
137 |
| - """ |
138 |
| - def _rich_str(self): |
139 |
| - table = Table(show_header=True, header_style="bold magenta") |
140 |
| - |
141 |
| - # Add columns |
142 |
| - for col in self.columns: |
143 |
| - table.add_column(col, style="dim", width=12) |
144 |
| - |
145 |
| - # Add rows |
146 |
| - for row in self.itertuples(index=False): |
147 |
| - table.add_row(*map(str, row)) |
148 |
| - |
149 |
| - return table |
150 |
| - |
151 |
| - # reassign str to rich (to avoid messing up when rich.print is used) |
152 |
| - df.__rich__ = _rich_str.__get__(df) |
153 |
| - |
154 |
| -def extract_tables_from_html(html:str, formatter:callable=None): |
155 |
| - """ |
156 |
| - Extract tables from a HTML source and convert them into dataframes |
157 |
| - """ |
158 |
| - soup = BeautifulSoup(html, 'html.parser') |
159 |
| - tables = soup.find_all('table') |
160 |
| - |
161 |
| - dataframes = {} |
162 |
| - unnamed_table_count = 0 |
163 |
| - for table in tables: |
164 |
| - print("Found a table") |
165 |
| - # Find the nearest header |
166 |
| - header = table.find_previous(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) |
167 |
| - if header: |
168 |
| - header_text = header.get_text() |
169 |
| - else: |
170 |
| - unnamed_table_count += 1 |
171 |
| - header_text = f"Unnamed Table {unnamed_table_count}" |
172 |
| - |
173 |
| - # Convert HTML table to DataFrame |
174 |
| - df = pd.read_html(StringIO(str(table)))[0] |
175 |
| - if formatter: |
176 |
| - formatter(df) |
177 |
| - # Add DataFrame to dictionary with header as key |
178 |
| - dataframes[header_text] = df |
179 |
| - |
180 |
| - return dataframes |
181 |
| - |
182 |
| - |
183 |
| -def get_frontmatter(text:str) -> tuple[str, dict]: |
184 |
| - "Get the front matter from a markdown file" |
185 |
| - # Split the content to extract the YAML front matter |
186 |
| - parts = text.split('---',maxsplit=2) |
187 |
| - if len(parts) > 1: |
188 |
| - frontmatter = parts[1] |
189 |
| - metadata = SuperDict(yaml.safe_load(frontmatter)) |
190 |
| - try: |
191 |
| - markdown = parts[2] |
192 |
| - except IndexError: |
193 |
| - markdown = '' |
194 |
| - return (markdown.strip(), frontmatter, metadata) |
195 |
| - else: |
196 |
| - return (text, '', {}) |
197 |
| - |
198 |
| -def find_in_html(html: str, |
199 |
| - pattern: str, |
200 |
| - header: str = None, header_level: int = None) -> str | None: |
201 |
| - """ |
202 |
| - Find a text or regex pattern in a HTML document (case-insensitive) |
203 |
| - |
204 |
| - Arguments |
205 |
| - --------- |
206 |
| - - html: the html string |
207 |
| - - pattern: the text or regex |
208 |
| - - header (text or regex): if specified, it finds it first, |
209 |
| - and then looks for the text between that header and the next one |
210 |
| - (any level). |
211 |
| - - header_level: you can speciy it, if there is a risk of ambiguity. |
212 |
| -
|
213 |
| - Returns |
214 |
| - ------- |
215 |
| - The line where the pattern was found, or None |
216 |
| - """ |
217 |
| - if not isinstance(pattern, str): |
218 |
| - pattern = str(pattern) |
219 |
| - |
220 |
| - soup = BeautifulSoup(html, 'html.parser') |
221 |
| - |
222 |
| - # Compile regex patterns with case-insensitive flag |
223 |
| - pattern_regex = re.compile(pattern, re.IGNORECASE) |
224 |
| - |
225 |
| - if header: |
226 |
| - header_regex = re.compile(header, re.IGNORECASE) |
227 |
| - |
228 |
| - # Find all headers (h1 to h6) |
229 |
| - headers = soup.find_all(re.compile('^h[1-6]$', re.IGNORECASE)) |
230 |
| - |
231 |
| - for hdr in headers: |
232 |
| - if header_regex.search(hdr.text): |
233 |
| - # Check if header level is specified and matches |
234 |
| - if header_level and hdr.name != f'h{header_level}': |
235 |
| - continue |
236 |
| - |
237 |
| - # Extract text until the next header |
238 |
| - text = [] |
239 |
| - for sibling in hdr.find_next_siblings(): |
240 |
| - if sibling.name and re.match('^h[1-6]$', sibling.name, re.IGNORECASE): |
241 |
| - break |
242 |
| - text.append(sibling.get_text(separator='\n', strip=True)) |
243 |
| - |
244 |
| - full_text = '\n'.join(text) |
245 |
| - |
246 |
| - # Search for the pattern in the extracted text |
247 |
| - match = pattern_regex.search(full_text) |
248 |
| - if match: |
249 |
| - # Find the full line containing the match |
250 |
| - lines = full_text.split('\n') |
251 |
| - for line in lines: |
252 |
| - if pattern_regex.search(line): |
253 |
| - return line |
254 |
| - else: |
255 |
| - # Extract all text from the document |
256 |
| - full_text = soup.get_text(separator='\n', strip=True) |
257 |
| - |
258 |
| - # Search for the pattern in the full text |
259 |
| - match = pattern_regex.search(full_text) |
260 |
| - if match: |
261 |
| - # Find the full line containing the match |
262 |
| - lines = full_text.split('\n') |
263 |
| - for line in lines: |
264 |
| - if pattern_regex.search(line): |
265 |
| - return line |
266 |
| - |
267 |
| - return None |
268 |
| - |
269 |
| - |
270 |
| - |
271 |
| - |
272 |
| - |
273 |
| - |
274 |
| -def get_first_h1(markdown_text: str): |
275 |
| - """ |
276 |
| - Get the first h1 in a markdown file, |
277 |
| - ignoring YAML frontmatter and comments. |
278 |
| - """ |
279 |
| - # Remove YAML frontmatter |
280 |
| - yaml_frontmatter_pattern = re.compile(r'^---\s*\n(.*?\n)?---\s*\n', |
281 |
| - re.DOTALL) |
282 |
| - markdown_text = yaml_frontmatter_pattern.sub('', markdown_text) |
283 |
| - # Regular expression to match both syntaxes for level 1 headers |
284 |
| - h1_pattern = re.compile(r'^(# .+|.+\n=+)', re.MULTILINE) |
285 |
| - match = h1_pattern.search(markdown_text) |
286 |
| - if match: |
287 |
| - header = match.group(0) |
288 |
| - # Remove formatting |
289 |
| - if header.startswith('#'): |
290 |
| - return header.lstrip('# ').strip() |
291 |
| - else: |
292 |
| - return header.split('\n')[0].strip() |
293 |
| - return None |
294 |
| - |
295 |
| - |
296 |
| - |
297 |
| -def get_tables(markdown_text:str) -> dict[pd.DataFrame]: |
298 |
| - """ |
299 |
| - Convert markdown text to HTML, extract tables, |
300 |
| - and convert them to dataframes. |
301 |
| - """ |
302 |
| - html = markdown_to_html(markdown_text) |
303 |
| - dataframes = extract_tables_from_html(html, |
304 |
| - formatter=style_dataframe) |
305 |
| - return dataframes |
306 |
| - |
307 |
| - |
308 |
| - |
309 |
| -# --------------------------- |
310 |
| -# OS Functions |
311 |
| -# --------------------------- |
312 |
| -def run_command(command, *args) -> subprocess.CompletedProcess: |
313 |
| - "Execute a command" |
314 |
| - full_command = [command] + list(args) |
315 |
| - return subprocess.run(full_command, capture_output=True, text=True) |
316 |
| - |
317 |
| -def get_caller_directory(): |
318 |
| - "Get the caller's directory name (to be called from a function)" |
319 |
| - # Get the current frame |
320 |
| - current_frame = inspect.currentframe() |
321 |
| - # Get the caller's frame |
322 |
| - caller_frame = inspect.getouterframes(current_frame, 2) |
323 |
| - # Get the file name of the caller |
324 |
| - caller_file = caller_frame[1].filename |
325 |
| - # Get the absolute path of the directory containing the caller file |
326 |
| - directory_abspath = os.path.abspath(os.path.dirname(caller_file)) |
327 |
| - return directory_abspath |
328 |
| - |
329 | 70 | # ---------------------------
|
330 | 71 | # Log parsing
|
331 | 72 | # ---------------------------
|
@@ -824,24 +565,19 @@ def pages(self) -> List[TestMarkdownPage]:
|
824 | 565 | return self._pages
|
825 | 566 |
|
826 | 567 | def get_page(self, name:str):
|
827 |
| - "Get the page by its filename or a substring" |
828 |
| - print("SEARCHING:", name) |
829 |
| - for page in self.pages: |
830 |
| - # give priority to exact matches |
831 |
| - if name == page.filename: |
832 |
| - return page |
833 |
| - # try without extension |
834 |
| - stem, _ = os.path.splitext(page.filename) |
835 |
| - if name == stem: |
836 |
| - return page |
837 |
| - # try again without full path |
| 568 | + """ |
| 569 | + Find a name in the list of Markdown pages (filenames) |
| 570 | + using a name (full or partial, with or without extension). |
| 571 | + """ |
| 572 | + # get all the filenames of pages: |
| 573 | + filenames = [page.filename for page in self.pages] |
| 574 | + # get the filename we want, from that list: |
| 575 | + filename = find_page(name, filenames) |
| 576 | + # return the corresponding page: |
838 | 577 | for page in self.pages:
|
839 |
| - if page.filename.endswith(name): |
| 578 | + if page.filename == filename: |
840 | 579 | return page
|
841 |
| - stem, _ = os.path.splitext(page.filename) |
842 |
| - if stem.endswith(name): |
843 |
| - return page |
844 |
| - print("- NOT FOUND") |
| 580 | + |
845 | 581 |
|
846 | 582 | def get_plugin(self, name:str) -> SuperDict:
|
847 | 583 | "Get the plugin by its plugin name"
|
|
0 commit comments