marcus-web/scripts/import_letterboxd.py

#!/usr/bin/env python3
"""
Import movies from Letterboxd diary to Hugo draft posts.

Usage:
    python scripts/import_letterboxd.py           # Interactive mode - pick from recent
    python scripts/import_letterboxd.py --latest  # Import most recent entry
    python scripts/import_letterboxd.py --list    # Just list recent entries
    python scripts/import_letterboxd.py --theater # Skip to theater questions
    python scripts/import_letterboxd.py --home    # Skip to home video questions

The script will prompt for viewing details (theater vs home) and pre-fill
the front matter table accordingly.
"""

import argparse
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from urllib.parse import urlparse
import xml.etree.ElementTree as ET

import requests

# Configuration
LETTERBOXD_USER = "marcusEID"
try:
    from config import TMDB_API_KEY
except ImportError:
    raise SystemExit("Error: scripts/config.py not found. Copy config.example.py to config.py and add your API key.")
RSS_URL = f"https://letterboxd.com/{LETTERBOXD_USER}/rss/"

# Paths (relative to script location)
SCRIPT_DIR = Path(__file__).parent
PROJECT_ROOT = SCRIPT_DIR.parent
CONTENT_DIR = PROJECT_ROOT / "content" / "posts"
IMAGES_DIR = PROJECT_ROOT / "static" / "images" / "posters"

# XML namespaces in Letterboxd RSS
NAMESPACES = {
    "letterboxd": "https://letterboxd.com",
    "tmdb": "https://themoviedb.org",
    "dc": "http://purl.org/dc/elements/1.1/",
}


def fetch_rss():
    """Fetch and parse Letterboxd RSS feed."""
    resp = requests.get(RSS_URL, timeout=10)
    resp.raise_for_status()
    return ET.fromstring(resp.content)


def parse_movies(root):
    """Extract movie entries from RSS (skip lists)."""
    movies = []
    for item in root.findall(".//item"):
        # Skip lists (they don't have tmdb:movieId)
        tmdb_id = item.find("tmdb:movieId", NAMESPACES)
        if tmdb_id is None:
            continue

        title = item.find("letterboxd:filmTitle", NAMESPACES)
        year = item.find("letterboxd:filmYear", NAMESPACES)
        rating = item.find("letterboxd:memberRating", NAMESPACES)
        watched = item.find("letterboxd:watchedDate", NAMESPACES)
        rewatch = item.find("letterboxd:rewatch", NAMESPACES)
        link = item.find("link")

        movies.append({
            "tmdb_id": tmdb_id.text,
            "title": title.text if title is not None else "Unknown",
            "year": year.text if year is not None else "",
            "rating": rating.text if rating is not None else "",
            "watched_date": watched.text if watched is not None else "",
            "rewatch": rewatch.text if rewatch is not None else "No",
            "letterboxd_url": link.text if link is not None else "",
        })

    return movies


def get_tmdb_details(tmdb_id):
    """Fetch movie details from TMDB including IMDB ID and poster."""
    url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
    params = {"api_key": TMDB_API_KEY}
    resp = requests.get(url, params=params, timeout=10)
    resp.raise_for_status()
    data = resp.json()

    return {
        "imdb_id": data.get("imdb_id", ""),
        "poster_path": data.get("poster_path", ""),
        "overview": data.get("overview", ""),
    }


def download_poster(poster_path, filename):
    """Download poster from TMDB to static/images/posters/."""
    if not poster_path:
        print("  No poster available")
        return None

    # Use w500 size for good quality without being huge
    url = f"https://image.tmdb.org/t/p/w500{poster_path}"
    resp = requests.get(url, timeout=10)
    resp.raise_for_status()

    IMAGES_DIR.mkdir(parents=True, exist_ok=True)
    filepath = IMAGES_DIR / filename
    filepath.write_bytes(resp.content)
    print(f"  Poster saved: {filepath.relative_to(PROJECT_ROOT)}")
    return f"/images/posters/{filename}"


def slugify(title):
    """Convert title to URL-friendly slug."""
    slug = title.lower()
    slug = re.sub(r"[^a-z0-9\s-]", "", slug)
    slug = re.sub(r"[\s_]+", "-", slug)
    slug = re.sub(r"-+", "-", slug)
    return slug.strip("-")


def rating_to_stars(rating):
    """Convert numeric rating to star display."""
    if not rating:
        return ""
    r = float(rating)
    full = int(r)
    half = r - full >= 0.5
    stars = "*" * full
    if half:
        stars += " 1/2"
    return f"{stars} ({rating})"


def prompt_viewing_details():
    """Prompt user for viewing location details."""
    print("\nWhere did you watch this?")
    print("  1. Theater")
    print("  2. Home")

    while True:
        choice = input("Enter 1 or 2: ").strip()
        if choice == "1":
            return prompt_theater_details()
        elif choice == "2":
            return prompt_home_details()
        else:
            print("Please enter 1 or 2")


def prompt_theater_details():
    """Prompt for theater-specific details."""
    print("\nWhich theater?")
    theaters = [
        ("1", "Gucci", "gucci"),
        ("2", "Ghost Theater", "ghost-theater"),
        ("3", "Marcel", "marcel"),
        ("4", "AMC South", "amc-south"),
        ("5", "AMC Lakeline", "amc-lakeline"),
        ("6", "Other", None),
    ]
    for num, name, _ in theaters:
        print(f"  {num}. {name}")

    theater_name = ""
    theater_tag = None
    while True:
        choice = input("Enter number: ").strip()
        for num, name, tag in theaters:
            if choice == num:
                if name == "Other":
                    theater_name = input("Theater name: ").strip()
                else:
                    theater_name = name
                    theater_tag = tag
                break
        if theater_name:
            break
        print("Please enter a valid number")

    show_time = input("Show time (e.g. 7:30pm): ").strip()
    theater_num = input("Theater number: ").strip()
    pizza = input("Pizza? (Yes/No): ").strip() or ""
    tickets = input("Tickets (e.g. 'At Box Office', 'A-List'): ").strip()
    crew = input("Crew (e.g. 'Me, Coach T, Science Bro'): ").strip()

    return {
        "type": "theater",
        "theater": theater_name,
        "theater_tag": theater_tag,
        "show_time": show_time,
        "theater_num": theater_num,
        "pizza": pizza,
        "tickets": tickets,
        "crew": crew,
    }


def prompt_home_details():
    """Prompt for home viewing details."""
    location = input("Location (e.g. 'Living Room', 'Woodrow Apt'): ").strip() or "Home"
    show_time = input("Show time (optional, e.g. 'evening'): ").strip()
    pizza = input("Pizza? (Yes/No): ").strip() or "No"

    # Media format
    print("\nMedia format?")
    media_options = [
        ("1", "Online"),
        ("2", "BluRay"),
        ("3", "DVD"),
        ("4", "VHS"),
    ]
    for num, name in media_options:
        print(f"  {num}. {name}")
    media = "Online"
    media_choice = input("Enter number (default 1): ").strip()
    for num, name in media_options:
        if media_choice == num:
            media = name
            break

    # Screen type
    print("\nScreen?")
    screen_options = [
        ("1", "4k TV"),
        ("2", "4k Computer"),
        ("3", "1080p Computer"),
        ("4", "Cell Phone"),
        ("5", "Someone Elses TV"),
    ]
    for num, name in screen_options:
        print(f"  {num}. {name}")
    screen = "4k TV"
    screen_choice = input("Enter number (default 1): ").strip()
    for num, name in screen_options:
        if screen_choice == num:
            screen = name
            break

    return {
        "type": "home",
        "theater": "Home Video",
        "theater_tag": "homevideo",
        "show_time": show_time,
        "theater_num": location,
        "pizza": pizza,
        "media": media,
        "screen": screen,
    }


def create_draft_post(movie, tmdb_details, poster_url, viewing_details=None):
    """Create a Hugo draft post for the movie."""
    slug = slugify(movie["title"])
    filename = f"{slug}.md"
    filepath = CONTENT_DIR / filename

    if filepath.exists():
        print(f"  Post already exists: {filepath.relative_to(PROJECT_ROOT)}")
        return None

    # Format the date for Hugo
    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    # Format watched date nicely
    watched = movie["watched_date"]
    if watched:
        try:
            dt = datetime.strptime(watched, "%Y-%m-%d")
            watched_display = dt.strftime("%B %d, %Y")
        except ValueError:
            watched_display = watched
    else:
        watched_display = ""

    imdb_id = tmdb_details.get("imdb_id", "")
    rating_display = rating_to_stars(movie["rating"])

    # Use viewing details if provided, otherwise use empty defaults
    if viewing_details:
        show_time = viewing_details.get("show_time", "")
        theater = viewing_details.get("theater", "")
        theater_num = viewing_details.get("theater_num", "")
        pizza = viewing_details.get("pizza", "")
        is_home = viewing_details.get("type") == "home"

        # Build tags based on viewing type
        tags = []
        if viewing_details.get("theater_tag"):
            tags.append(viewing_details["theater_tag"])
        tags.extend(["no-expectations"])
        if pizza.lower() == "yes":
            tags.append("had pizza")
        tags_yaml = "\n".join(f"  - {tag}" for tag in tags)

        # Different last two rows for home vs theater
        if is_home:
            row5_label = "Media"
            row5_value = viewing_details.get("media", "")
            row7_label = "Screen"
            row7_value = viewing_details.get("screen", "")
        else:
            row5_label = "Tickets"
            row5_value = viewing_details.get("tickets", "")
            row7_label = "Crew"
            row7_value = viewing_details.get("crew", "")
    else:
        show_time = ""
        theater = ""
        theater_num = ""
        pizza = ""
        row5_label = "Tickets"
        row5_value = ""
        row7_label = "Crew"
        row7_value = ""
        tags_yaml = """  - gucci
  - ghost-theater
  - marcel
  - amc-south
  - amc-lakeline
  - anticipated
  - no-expectations
  - had pizza"""

    # Build the frontmatter and content
    content = f'''---
title: '{movie["title"]}'
date: {now}
draft: true
series: "Frank's Couch"
summary: ""
imdb: "{imdb_id}"
poster: "{poster_url or ''}"
tags:
{tags_yaml}
# Mastodon comments: After posting about this on Mastodon, add the post ID below.
# Get the ID from the end of the toot URL, e.g. https://tilde.zone/@mnw/123456789
# mastodon_id: ""
# To block a reply from showing, add its full URL to this list:
# mastodon_blocked:
#   - "https://tilde.zone/@someone/123456789"
---
{{{{< imdbposter >}}}}

| Date watched        | {watched_display:<17} |
|---------------------|-------------------|
| Show Time           | {show_time:<17} |
| Theater             | {theater:<17} |
| Theater Number      | {theater_num:<17} |
| Pizza               | {pizza:<17} |
| {row5_label:<19} | {row5_value:<17} |
| Letterboxd Rating   | {rating_display:<17} |
| {row7_label:<19} | {row7_value:<17} |

{{{{< /imdbposter >}}}}

Write your review here...

'''

    filepath.write_text(content)
    print(f"  Draft created: {filepath.relative_to(PROJECT_ROOT)}")
    return filepath


def display_movies(movies, limit=10):
    """Display a list of recent movies."""
    print(f"\nRecent movies from Letterboxd ({LETTERBOXD_USER}):\n")
    for i, m in enumerate(movies[:limit], 1):
        rewatch = " (rewatch)" if m["rewatch"] == "Yes" else ""
        rating = f" - {m['rating']}*" if m["rating"] else ""
        print(f"  {i}. {m['title']} ({m['year']}){rating}{rewatch}")
        print(f"     Watched: {m['watched_date']}")
    print()


def import_movie(movie, viewing_mode=None):
    """Import a single movie: fetch details, download poster, create post.

    Args:
        movie: Movie data from Letterboxd RSS
        viewing_mode: 'theater', 'home', or None (will prompt)
    """
    print(f"\nImporting: {movie['title']} ({movie['year']})")

    # Get viewing details
    if viewing_mode == "theater":
        viewing_details = prompt_theater_details()
    elif viewing_mode == "home":
        viewing_details = prompt_home_details()
    else:
        viewing_details = prompt_viewing_details()

    # Get TMDB details
    print("\n  Fetching TMDB details...")
    tmdb = get_tmdb_details(movie["tmdb_id"])

    # Download poster
    poster_url = None
    if tmdb["poster_path"]:
        print("  Downloading poster...")
        poster_filename = f"{slugify(movie['title'])}.jpg"
        poster_url = download_poster(tmdb["poster_path"], poster_filename)

    # Create draft post
    print("  Creating draft post...")
    filepath = create_draft_post(movie, tmdb, poster_url, viewing_details)

    if filepath:
        print(f"\nDone! Edit your draft at: {filepath.relative_to(PROJECT_ROOT)}")
        if tmdb.get("imdb_id"):
            print(f"IMDB: https://www.imdb.com/title/{tmdb['imdb_id']}/")

    return filepath


def main():
    parser = argparse.ArgumentParser(description="Import Letterboxd movies to Hugo")
    parser.add_argument("--latest", action="store_true", help="Import most recent entry")
    parser.add_argument("--list", action="store_true", help="Just list recent entries")
    parser.add_argument("--count", type=int, default=10, help="Number of entries to show")
    parser.add_argument("--theater", action="store_true", help="Skip viewing prompt, go straight to theater questions")
    parser.add_argument("--home", action="store_true", help="Skip viewing prompt, go straight to home questions")
    args = parser.parse_args()

    # Determine viewing mode from flags
    viewing_mode = None
    if args.theater:
        viewing_mode = "theater"
    elif args.home:
        viewing_mode = "home"

    print("Fetching Letterboxd RSS feed...")
    try:
        root = fetch_rss()
    except Exception as e:
        print(f"Error fetching RSS: {e}")
        sys.exit(1)

    movies = parse_movies(root)
    if not movies:
        print("No movies found in feed.")
        sys.exit(1)

    if args.list:
        display_movies(movies, args.count)
        sys.exit(0)

    if args.latest:
        import_movie(movies[0], viewing_mode)
        sys.exit(0)

    # Interactive mode
    display_movies(movies, args.count)

    try:
        choice = input("Enter number to import (or 'q' to quit): ").strip()
        if choice.lower() == 'q':
            sys.exit(0)
        idx = int(choice) - 1
        if 0 <= idx < len(movies):
            import_movie(movies[idx], viewing_mode)
        else:
            print("Invalid selection")
            sys.exit(1)
    except (ValueError, KeyboardInterrupt):
        print("\nCancelled")
        sys.exit(1)


if __name__ == "__main__":
    main()