Files
marcus-web/scripts/gopher/convert_to_gopher.py
Marcus b7ac21093a Fix import path issue in gopher scripts
Add script directory to sys.path so imports work when scripts are
called from remote_publish.sh. Also remove unused variable and
no-op regex.
2026-01-14 16:24:42 -06:00

352 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Convert Hugo markdown posts to gopher-formatted text files.
Usage:
python scripts/gopher/convert_to_gopher.py content/posts/blog-posting.md
python scripts/gopher/convert_to_gopher.py --all
python scripts/gopher/convert_to_gopher.py --all --output gopher_build/blog/
"""
import argparse
import re
import sys
import textwrap
import yaml
from pathlib import Path
# Add script directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from ascii_art import (
LINE_WIDTH,
POST_HEADER,
POST_FOOTER,
SERIES_TO_DIR,
get_post_meta_header,
get_title_block,
get_section_header,
get_subheading,
generate_movie_table,
format_links_section,
)
# Paths
SCRIPT_DIR = Path(__file__).parent
PROJECT_ROOT = SCRIPT_DIR.parent.parent
CONTENT_DIR = PROJECT_ROOT / "content" / "posts"
DEFAULT_OUTPUT = PROJECT_ROOT / "gopher_build" / "blog"
def parse_frontmatter(content: str) -> tuple[dict, str]:
"""Parse YAML frontmatter and return (metadata, body)."""
if not content.startswith("---"):
return {}, content
# Find the closing ---
end_match = re.search(r"\n---\n", content[3:])
if not end_match:
return {}, content
yaml_end = end_match.start() + 3
yaml_content = content[3:yaml_end]
body = content[yaml_end + 4 :] # Skip the closing ---\n
try:
metadata = yaml.safe_load(yaml_content)
except yaml.YAMLError:
metadata = {}
return metadata or {}, body
def extract_links(text: str) -> tuple[str, list]:
"""Extract markdown links and replace with numbered references."""
links = []
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
def replace_link(match):
text = match.group(1)
url = match.group(2)
links.append(url)
return f"{text} [{len(links)}]"
converted = link_pattern.sub(replace_link, text)
return converted, links
def convert_headings(text: str) -> str:
"""Convert markdown headings to gopher-style text."""
# H1: Double line with centered text
def h1_replace(match):
title = match.group(1).strip()
return get_section_header(title)
# H2: Dashed subheading
def h2_replace(match):
title = match.group(1).strip()
return get_subheading(title)
# H3-H6: Just bold-style text
def h3_replace(match):
title = match.group(1).strip()
return f"\n*{title}*\n"
text = re.sub(r"^# (.+)$", h1_replace, text, flags=re.MULTILINE)
text = re.sub(r"^## (.+)$", h2_replace, text, flags=re.MULTILINE)
text = re.sub(r"^###+ (.+)$", h3_replace, text, flags=re.MULTILINE)
return text
def convert_formatting(text: str) -> str:
"""Convert markdown formatting to gopher-style text."""
# Bold: **text** -> *text*
text = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", text)
# Italic: *text* -> _text_ (but not our converted bold *text*)
text = re.sub(r"(?<!\*)\*([^*]+)\*(?!\*)", r"_\1_", text)
# Horizontal rules
text = re.sub(r"^---+$", "-" * LINE_WIDTH, text, flags=re.MULTILINE)
return text
def convert_code_blocks(text: str) -> str:
"""Convert code blocks to indented text."""
# Fenced code blocks
def indent_code(match):
code = match.group(2)
# Indent each line by 4 spaces
indented = "\n".join(" " + line for line in code.split("\n"))
return f"\n{indented}\n"
text = re.sub(r"```(\w*)\n(.*?)```", indent_code, text, flags=re.DOTALL)
# Inline code: `code` -> code (just remove backticks)
text = re.sub(r"`([^`]+)`", r"\1", text)
return text
def handle_imdbposter(text: str, metadata: dict, slug: str) -> str:
"""Replace imdbposter shortcode with ASCII movie table."""
# Check if there's an imdbposter shortcode
pattern = r"\{\{<\s*imdbposter\s*>\}\}(.*?)\{\{<\s*/imdbposter\s*>\}\}"
match = re.search(pattern, text, flags=re.DOTALL)
if not match:
return text
# Extract movie info from frontmatter
title = metadata.get("title", "Unknown")
year = metadata.get("year", "")
director = metadata.get("director", "")
runtime = metadata.get("runtime", 0)
genres = metadata.get("genres", [])
web_url = f"https://mnw.sdf.org/posts/{slug}/"
# If year not in frontmatter, try to parse from date
if not year and metadata.get("date"):
date_str = str(metadata.get("date"))
year_match = re.match(r"(\d{4})", date_str)
if year_match:
year = int(year_match.group(1))
# Generate the ASCII table
table = generate_movie_table(
title=title,
year=year,
director=director,
runtime=runtime,
genres=genres,
web_url=web_url,
)
# Also extract and preserve the viewing info table if present
inner_content = match.group(1).strip()
if inner_content:
# Convert markdown table to plain text
table_lines = []
for line in inner_content.split("\n"):
line = line.strip()
if line and not line.startswith("|--"):
# Remove leading/trailing pipes and clean up
line = re.sub(r"^\||\|$", "", line)
cells = [c.strip() for c in line.split("|")]
if len(cells) >= 2:
table_lines.append(f" {cells[0]}: {cells[1]}")
if table_lines:
table += "\n\n" + "\n".join(table_lines)
return text[: match.start()] + table + text[match.end() :]
def wrap_paragraphs(text: str) -> str:
"""Wrap text to LINE_WIDTH, preserving code blocks and structure."""
lines = text.split("\n")
result = []
paragraph = []
def flush_paragraph():
if paragraph:
para_text = " ".join(paragraph)
wrapped = textwrap.fill(para_text, width=LINE_WIDTH)
result.append(wrapped)
paragraph.clear()
for line in lines:
# Detect code blocks (indented by 4 spaces)
if line.startswith(" "):
flush_paragraph()
result.append(line)
continue
# Empty line = paragraph break
if not line.strip():
flush_paragraph()
result.append("")
continue
# Lines that look like headers or dividers - don't wrap
if (
line.startswith("=")
or line.startswith("-" * 10)
or line.startswith("*")
or line.startswith(" " * 10)
):
flush_paragraph()
result.append(line)
continue
# Accumulate paragraph text
paragraph.append(line.strip())
flush_paragraph()
return "\n".join(result)
def convert_post(filepath: Path, output_dir: Path = None) -> Path | None:
"""Convert a single markdown post to gopher format."""
content = filepath.read_text()
metadata, body = parse_frontmatter(content)
# Check if phlog is enabled
if not metadata.get("phlog", False):
return None
# Check if draft
if metadata.get("draft", False):
return None
# Determine output directory from series
series = metadata.get("series", "Fun Center")
gopher_dir = SERIES_TO_DIR.get(series, "fun-center")
# Get slug from filename
slug = filepath.stem
# Output path
output_dir = output_dir or DEFAULT_OUTPUT
category_dir = output_dir / gopher_dir
category_dir.mkdir(parents=True, exist_ok=True)
output_path = category_dir / f"{slug}.txt"
# Handle imdbposter shortcode first
body = handle_imdbposter(body, metadata, slug)
# Extract links before other conversions
body, links = extract_links(body)
# Convert markdown to gopher text
body = convert_headings(body)
body = convert_formatting(body)
body = convert_code_blocks(body)
# Wrap paragraphs
body = wrap_paragraphs(body)
# Build the final document
date_str = ""
if metadata.get("date"):
date_obj = metadata["date"]
if hasattr(date_obj, "strftime"):
date_str = date_obj.strftime("%Y-%m-%d")
else:
date_str = str(date_obj)[:10]
title = metadata.get("title", slug)
summary = metadata.get("summary", "")
parts = [
POST_HEADER,
get_post_meta_header(date_str, series),
get_title_block(title, summary),
"",
"-" * LINE_WIDTH,
"",
body.strip(),
"",
]
if links:
parts.append(format_links_section(links))
parts.append("")
parts.append(POST_FOOTER)
parts.append("")
parts.append(f"Web version: https://mnw.sdf.org/posts/{slug}/")
output_content = "\n".join(parts)
output_path.write_text(output_content)
return output_path
def convert_all(output_dir: Path = None) -> list[Path]:
"""Convert all posts with phlog: true."""
output_dir = output_dir or DEFAULT_OUTPUT
converted = []
for post_path in CONTENT_DIR.glob("*.md"):
result = convert_post(post_path, output_dir)
if result:
converted.append(result)
print(f"Converted: {post_path.name} -> {result.relative_to(output_dir)}")
return converted
def main():
parser = argparse.ArgumentParser(description="Convert Hugo posts to gopher format")
parser.add_argument("file", nargs="?", help="Single file to convert")
parser.add_argument("--all", action="store_true", help="Convert all phlog posts")
parser.add_argument("--output", "-o", type=Path, help="Output directory")
args = parser.parse_args()
output = args.output or DEFAULT_OUTPUT
if args.all:
converted = convert_all(output)
print(f"\nConverted {len(converted)} posts to {output}")
elif args.file:
filepath = Path(args.file)
if not filepath.exists():
print(f"File not found: {filepath}")
return 1
result = convert_post(filepath, output)
if result:
print(f"Converted: {result}")
else:
print("Post skipped (no phlog: true or is draft)")
else:
parser.print_help()
return 1
return 0
if __name__ == "__main__":
exit(main())