Add script directory to sys.path so imports work when scripts are called from remote_publish.sh. Also remove unused variable and no-op regex.
352 lines
10 KiB
Python
352 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Convert Hugo markdown posts to gopher-formatted text files.
|
|
|
|
Usage:
|
|
python scripts/gopher/convert_to_gopher.py content/posts/blog-posting.md
|
|
python scripts/gopher/convert_to_gopher.py --all
|
|
python scripts/gopher/convert_to_gopher.py --all --output gopher_build/blog/
|
|
"""
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
import textwrap
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
# Add script directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from ascii_art import (
|
|
LINE_WIDTH,
|
|
POST_HEADER,
|
|
POST_FOOTER,
|
|
SERIES_TO_DIR,
|
|
get_post_meta_header,
|
|
get_title_block,
|
|
get_section_header,
|
|
get_subheading,
|
|
generate_movie_table,
|
|
format_links_section,
|
|
)
|
|
|
|
# Paths
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
PROJECT_ROOT = SCRIPT_DIR.parent.parent
|
|
CONTENT_DIR = PROJECT_ROOT / "content" / "posts"
|
|
DEFAULT_OUTPUT = PROJECT_ROOT / "gopher_build" / "blog"
|
|
|
|
|
|
def parse_frontmatter(content: str) -> tuple[dict, str]:
|
|
"""Parse YAML frontmatter and return (metadata, body)."""
|
|
if not content.startswith("---"):
|
|
return {}, content
|
|
|
|
# Find the closing ---
|
|
end_match = re.search(r"\n---\n", content[3:])
|
|
if not end_match:
|
|
return {}, content
|
|
|
|
yaml_end = end_match.start() + 3
|
|
yaml_content = content[3:yaml_end]
|
|
body = content[yaml_end + 4 :] # Skip the closing ---\n
|
|
|
|
try:
|
|
metadata = yaml.safe_load(yaml_content)
|
|
except yaml.YAMLError:
|
|
metadata = {}
|
|
|
|
return metadata or {}, body
|
|
|
|
|
|
def extract_links(text: str) -> tuple[str, list]:
|
|
"""Extract markdown links and replace with numbered references."""
|
|
links = []
|
|
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
|
|
def replace_link(match):
|
|
text = match.group(1)
|
|
url = match.group(2)
|
|
links.append(url)
|
|
return f"{text} [{len(links)}]"
|
|
|
|
converted = link_pattern.sub(replace_link, text)
|
|
return converted, links
|
|
|
|
|
|
def convert_headings(text: str) -> str:
|
|
"""Convert markdown headings to gopher-style text."""
|
|
# H1: Double line with centered text
|
|
def h1_replace(match):
|
|
title = match.group(1).strip()
|
|
return get_section_header(title)
|
|
|
|
# H2: Dashed subheading
|
|
def h2_replace(match):
|
|
title = match.group(1).strip()
|
|
return get_subheading(title)
|
|
|
|
# H3-H6: Just bold-style text
|
|
def h3_replace(match):
|
|
title = match.group(1).strip()
|
|
return f"\n*{title}*\n"
|
|
|
|
text = re.sub(r"^# (.+)$", h1_replace, text, flags=re.MULTILINE)
|
|
text = re.sub(r"^## (.+)$", h2_replace, text, flags=re.MULTILINE)
|
|
text = re.sub(r"^###+ (.+)$", h3_replace, text, flags=re.MULTILINE)
|
|
|
|
return text
|
|
|
|
|
|
def convert_formatting(text: str) -> str:
|
|
"""Convert markdown formatting to gopher-style text."""
|
|
# Bold: **text** -> *text*
|
|
text = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", text)
|
|
|
|
# Italic: *text* -> _text_ (but not our converted bold *text*)
|
|
text = re.sub(r"(?<!\*)\*([^*]+)\*(?!\*)", r"_\1_", text)
|
|
|
|
# Horizontal rules
|
|
text = re.sub(r"^---+$", "-" * LINE_WIDTH, text, flags=re.MULTILINE)
|
|
|
|
return text
|
|
|
|
|
|
def convert_code_blocks(text: str) -> str:
|
|
"""Convert code blocks to indented text."""
|
|
# Fenced code blocks
|
|
def indent_code(match):
|
|
code = match.group(2)
|
|
# Indent each line by 4 spaces
|
|
indented = "\n".join(" " + line for line in code.split("\n"))
|
|
return f"\n{indented}\n"
|
|
|
|
text = re.sub(r"```(\w*)\n(.*?)```", indent_code, text, flags=re.DOTALL)
|
|
|
|
# Inline code: `code` -> code (just remove backticks)
|
|
text = re.sub(r"`([^`]+)`", r"\1", text)
|
|
|
|
return text
|
|
|
|
|
|
def handle_imdbposter(text: str, metadata: dict, slug: str) -> str:
|
|
"""Replace imdbposter shortcode with ASCII movie table."""
|
|
# Check if there's an imdbposter shortcode
|
|
pattern = r"\{\{<\s*imdbposter\s*>\}\}(.*?)\{\{<\s*/imdbposter\s*>\}\}"
|
|
match = re.search(pattern, text, flags=re.DOTALL)
|
|
|
|
if not match:
|
|
return text
|
|
|
|
# Extract movie info from frontmatter
|
|
title = metadata.get("title", "Unknown")
|
|
year = metadata.get("year", "")
|
|
director = metadata.get("director", "")
|
|
runtime = metadata.get("runtime", 0)
|
|
genres = metadata.get("genres", [])
|
|
web_url = f"https://mnw.sdf.org/posts/{slug}/"
|
|
|
|
# If year not in frontmatter, try to parse from date
|
|
if not year and metadata.get("date"):
|
|
date_str = str(metadata.get("date"))
|
|
year_match = re.match(r"(\d{4})", date_str)
|
|
if year_match:
|
|
year = int(year_match.group(1))
|
|
|
|
# Generate the ASCII table
|
|
table = generate_movie_table(
|
|
title=title,
|
|
year=year,
|
|
director=director,
|
|
runtime=runtime,
|
|
genres=genres,
|
|
web_url=web_url,
|
|
)
|
|
|
|
# Also extract and preserve the viewing info table if present
|
|
inner_content = match.group(1).strip()
|
|
if inner_content:
|
|
# Convert markdown table to plain text
|
|
table_lines = []
|
|
for line in inner_content.split("\n"):
|
|
line = line.strip()
|
|
if line and not line.startswith("|--"):
|
|
# Remove leading/trailing pipes and clean up
|
|
line = re.sub(r"^\||\|$", "", line)
|
|
cells = [c.strip() for c in line.split("|")]
|
|
if len(cells) >= 2:
|
|
table_lines.append(f" {cells[0]}: {cells[1]}")
|
|
if table_lines:
|
|
table += "\n\n" + "\n".join(table_lines)
|
|
|
|
return text[: match.start()] + table + text[match.end() :]
|
|
|
|
|
|
def wrap_paragraphs(text: str) -> str:
|
|
"""Wrap text to LINE_WIDTH, preserving code blocks and structure."""
|
|
lines = text.split("\n")
|
|
result = []
|
|
paragraph = []
|
|
|
|
def flush_paragraph():
|
|
if paragraph:
|
|
para_text = " ".join(paragraph)
|
|
wrapped = textwrap.fill(para_text, width=LINE_WIDTH)
|
|
result.append(wrapped)
|
|
paragraph.clear()
|
|
|
|
for line in lines:
|
|
# Detect code blocks (indented by 4 spaces)
|
|
if line.startswith(" "):
|
|
flush_paragraph()
|
|
result.append(line)
|
|
continue
|
|
|
|
# Empty line = paragraph break
|
|
if not line.strip():
|
|
flush_paragraph()
|
|
result.append("")
|
|
continue
|
|
|
|
# Lines that look like headers or dividers - don't wrap
|
|
if (
|
|
line.startswith("=")
|
|
or line.startswith("-" * 10)
|
|
or line.startswith("*")
|
|
or line.startswith(" " * 10)
|
|
):
|
|
flush_paragraph()
|
|
result.append(line)
|
|
continue
|
|
|
|
# Accumulate paragraph text
|
|
paragraph.append(line.strip())
|
|
|
|
flush_paragraph()
|
|
return "\n".join(result)
|
|
|
|
|
|
def convert_post(filepath: Path, output_dir: Path = None) -> Path | None:
|
|
"""Convert a single markdown post to gopher format."""
|
|
content = filepath.read_text()
|
|
metadata, body = parse_frontmatter(content)
|
|
|
|
# Check if phlog is enabled
|
|
if not metadata.get("phlog", False):
|
|
return None
|
|
|
|
# Check if draft
|
|
if metadata.get("draft", False):
|
|
return None
|
|
|
|
# Determine output directory from series
|
|
series = metadata.get("series", "Fun Center")
|
|
gopher_dir = SERIES_TO_DIR.get(series, "fun-center")
|
|
|
|
# Get slug from filename
|
|
slug = filepath.stem
|
|
|
|
# Output path
|
|
output_dir = output_dir or DEFAULT_OUTPUT
|
|
category_dir = output_dir / gopher_dir
|
|
category_dir.mkdir(parents=True, exist_ok=True)
|
|
output_path = category_dir / f"{slug}.txt"
|
|
|
|
# Handle imdbposter shortcode first
|
|
body = handle_imdbposter(body, metadata, slug)
|
|
|
|
# Extract links before other conversions
|
|
body, links = extract_links(body)
|
|
|
|
# Convert markdown to gopher text
|
|
body = convert_headings(body)
|
|
body = convert_formatting(body)
|
|
body = convert_code_blocks(body)
|
|
|
|
# Wrap paragraphs
|
|
body = wrap_paragraphs(body)
|
|
|
|
# Build the final document
|
|
date_str = ""
|
|
if metadata.get("date"):
|
|
date_obj = metadata["date"]
|
|
if hasattr(date_obj, "strftime"):
|
|
date_str = date_obj.strftime("%Y-%m-%d")
|
|
else:
|
|
date_str = str(date_obj)[:10]
|
|
|
|
title = metadata.get("title", slug)
|
|
summary = metadata.get("summary", "")
|
|
|
|
parts = [
|
|
POST_HEADER,
|
|
get_post_meta_header(date_str, series),
|
|
get_title_block(title, summary),
|
|
"",
|
|
"-" * LINE_WIDTH,
|
|
"",
|
|
body.strip(),
|
|
"",
|
|
]
|
|
|
|
if links:
|
|
parts.append(format_links_section(links))
|
|
parts.append("")
|
|
|
|
parts.append(POST_FOOTER)
|
|
parts.append("")
|
|
parts.append(f"Web version: https://mnw.sdf.org/posts/{slug}/")
|
|
|
|
output_content = "\n".join(parts)
|
|
output_path.write_text(output_content)
|
|
|
|
return output_path
|
|
|
|
|
|
def convert_all(output_dir: Path = None) -> list[Path]:
|
|
"""Convert all posts with phlog: true."""
|
|
output_dir = output_dir or DEFAULT_OUTPUT
|
|
converted = []
|
|
|
|
for post_path in CONTENT_DIR.glob("*.md"):
|
|
result = convert_post(post_path, output_dir)
|
|
if result:
|
|
converted.append(result)
|
|
print(f"Converted: {post_path.name} -> {result.relative_to(output_dir)}")
|
|
|
|
return converted
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Convert Hugo posts to gopher format")
|
|
parser.add_argument("file", nargs="?", help="Single file to convert")
|
|
parser.add_argument("--all", action="store_true", help="Convert all phlog posts")
|
|
parser.add_argument("--output", "-o", type=Path, help="Output directory")
|
|
args = parser.parse_args()
|
|
|
|
output = args.output or DEFAULT_OUTPUT
|
|
|
|
if args.all:
|
|
converted = convert_all(output)
|
|
print(f"\nConverted {len(converted)} posts to {output}")
|
|
elif args.file:
|
|
filepath = Path(args.file)
|
|
if not filepath.exists():
|
|
print(f"File not found: {filepath}")
|
|
return 1
|
|
result = convert_post(filepath, output)
|
|
if result:
|
|
print(f"Converted: {result}")
|
|
else:
|
|
print("Post skipped (no phlog: true or is draft)")
|
|
else:
|
|
parser.print_help()
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit(main())
|