marcus-web/scripts/new_nfr.py

#!/usr/bin/env python3
"""
Create a new post for National Film Registry movies in the "Found in the Darkroom" series.

Usage:
    python scripts/new_nfr.py tt1234567              # From IMDB ID
    python scripts/new_nfr.py "Movie Title"          # From title (searches TMDB)
    python scripts/new_nfr.py --list-2024            # Show 2024 NFR list
    python scripts/new_nfr.py --nfr-year 2024        # Set NFR induction year

The script will:
1. Fetch movie data from TMDB (poster, year, director, runtime, genres)
2. Download the poster
3. Create a draft post using the darkroom archetype
4. Pre-fill metadata including NFR year
"""

import argparse
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path

import requests

# Configuration
try:
    from config import TMDB_API_KEY
except ImportError:
    raise SystemExit("Error: scripts/config.py not found. Copy config.example.py to config.py and add your API key.")

# Paths (relative to script location)
SCRIPT_DIR = Path(__file__).parent
PROJECT_ROOT = SCRIPT_DIR.parent
CONTENT_DIR = PROJECT_ROOT / "content" / "posts"
IMAGES_DIR = PROJECT_ROOT / "static" / "images" / "posters"

# 2024 National Film Registry inductees with LOC descriptions
# Source: https://newsroom.loc.gov/news/25-films-named-to-national-film-registry-for-preservation/
NFR_2024 = {
    "Annabelle Serpentine Dance": {
        "year": 1895,
        "description": 'Preserved as a foundational cinema work that "enticed and enchanted audiences" during film\'s infancy, demonstrating early technical innovations like hand-tinted color.'
    },
    "Koko's Earth Control": {
        "year": 1928,
        "description": "Selected for representing the Fleischer Studios' competitive animation style against Disney, featuring innovative techniques like rotoscoping that advanced the medium."
    },
    "Angels with Dirty Faces": {
        "year": 1938,
        "description": 'Recognized for depicting "Depression-era immigrant, segregated, hardscrabble neighborhoods" while navigating Production Code restrictions through redemptive storytelling.'
    },
    "The Pride of the Yankees": {
        "year": 1942,
        "description": "Honored as one of cinema's seminal sports films, featuring authentic appearances by former Yankees teammates and Lou Gehrig's iconic farewell speech recreation."
    },
    "Invaders from Mars": {
        "year": 1953,
        "description": 'Selected for establishing "the visual language of science fiction cinema" and influencing subsequent sci-fi works through post-war paranoia themes.'
    },
    "The Miracle Worker": {
        "year": 1962,
        "description": 'Preserved for Arthur Penn\'s "stark black and white" presentation of Helen Keller\'s story, told with minimal sentimentality to highlight human potential.'
    },
    "The Chelsea Girls": {
        "year": 1966,
        "description": 'Recognized as a Warhol experimental work that challenged narrative form through dual-projection and "infinite audience interpretations."'
    },
    "Ganja and Hess": {
        "year": 1973,
        "description": 'Honored for addressing "complexities of addiction, sexuality and Black identity" through Bill Gunn\'s visionary filmmaking that remained underrecognized.'
    },
    "The Texas Chain Saw Massacre": {
        "year": 1974,
        "description": 'Selected for establishing "tenets of the gore/slasher/splatter genre" despite initial controversy, becoming a "cultural and filmmaking touchstone."'
    },
    "Uptown Saturday Night": {
        "year": 1974,
        "description": 'Preserved as Sidney Poitier\'s directorial effort "dispelling stereotypes" of the Blaxploitation era through an entertaining crime comedy ensemble cast.'
    },
    "Zora Lathan Student Films": {
        "year": 1975,
        "description": "Six short films recognized for showcasing filmmaking techniques and design problem-solving approaches, documenting intimate domestic moments from early 1980s perspectives."
    },
    "Up in Smoke": {
        "year": 1978,
        "description": 'Selected for arguably establishing the "stoner" film genre and paving "the way for subsequent memorable movie characters" through comic improvisation.'
    },
    "Will": {
        "year": 1981,
        "description": 'Honored as "the first independent feature-length film directed by a Black woman," documenting early 1980s Harlem while addressing addiction and resilience themes.'
    },
    "Star Trek: The Wrath of Khan": {
        "year": 1982,
        "description": 'Preserved as "often considered the best of the six original-cast Star Trek theatrical films," featuring expert direction and exploration of sacrifice.'
    },
    "Beverly Hills Cop": {
        "year": 1984,
        "description": 'Recognized as "Eddie Murphy\'s first feature film on the registry" and establishing his "box-office superstar" status through this buddy-cop action-comedy.'
    },
    "Dirty Dancing": {
        "year": 1987,
        "description": 'Selected for remaining "influential and imitated" despite addressing serious themes including pregnancy, abortion, and breaking class barriers through dance.'
    },
    "Common Threads: Stories from the Quilt": {
        "year": 1989,
        "description": 'Honored as an Oscar-winning documentary serving as "a monument to the power of grief and activism" chronicling the AIDS Memorial Quilt\'s creation.'
    },
    "Powwow Highway": {
        "year": 1989,
        "description": 'Preserved as "one of the first" films treating "Native Americans as ordinary people," departing from Hollywood stereotypes through a witty buddy road narrative.'
    },
    "My Own Private Idaho": {
        "year": 1991,
        "description": 'Recognized for Gus Van Sant\'s "magnificently original cult classic" reimagining Shakespeare through street hustlers\' journeys with "dream-like vision and hardcore reality."'
    },
    "American Me": {
        "year": 1992,
        "description": 'Selected for Edward James Olmos\'s directorial debut depicting "dark, brutal realities of Chicano gang life" addressing prison drug trafficking with unflinching honesty.'
    },
    "Mi Familia": {
        "year": 1995,
        "description": 'Preserved as Gregory Nava\'s "emotional and evocative" multi-generational Mexican-American family story celebrating immigration\'s role in American vitality.'
    },
    "Compensation": {
        "year": 1999,
        "description": 'Honored for director Zeinabu irene Davis\'s innovative accessibility approach incorporating "American Sign Language and title cards" for deaf and hearing audiences.'
    },
    "Spy Kids": {
        "year": 2001,
        "description": 'Selected for Robert Rodriguez\'s incorporation of "Hispanic culture" through family-centered storytelling emphasizing "familial bonds and cultural heritage" authenticity.'
    },
    "No Country for Old Men": {
        "year": 2007,
        "description": 'Preserved as a Coen Brothers modern-day Western adaptation "hailed as a classic nearly from the moment of release," winning Best Picture Oscar recognition.'
    },
    "The Social Network": {
        "year": 2010,
        "description": 'Recognized for transforming a potentially "dry, geeky" corporate narrative into "a riveting examination" of modern business ethics and technology\'s societal impact.'
    },
}


def slugify(title):
    """Convert title to URL-friendly slug."""
    slug = title.lower()
    slug = re.sub(r"[^a-z0-9\s-]", "", slug)
    slug = re.sub(r"[\s_]+", "-", slug)
    slug = re.sub(r"-+", "-", slug)
    return slug.strip("-")


def search_tmdb_by_title(title, year=None):
    """Search TMDB for a movie by title and optionally year."""
    url = "https://api.themoviedb.org/3/search/movie"
    params = {
        "api_key": TMDB_API_KEY,
        "query": title,
    }
    if year:
        params["year"] = year

    resp = requests.get(url, params=params, timeout=10)
    resp.raise_for_status()
    data = resp.json()

    if not data.get("results"):
        return None

    # Return the first result
    return data["results"][0]


def get_tmdb_details(tmdb_id):
    """Fetch movie details from TMDB."""
    url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
    params = {"api_key": TMDB_API_KEY}
    resp = requests.get(url, params=params, timeout=10)
    resp.raise_for_status()
    return resp.json()


def get_imdb_id_from_tmdb(tmdb_id):
    """Get IMDB ID from TMDB ID."""
    data = get_tmdb_details(tmdb_id)
    return data.get("imdb_id", "")


def get_tmdb_id_from_imdb(imdb_id):
    """Convert IMDB ID to TMDB ID."""
    url = f"https://api.themoviedb.org/3/find/{imdb_id}"
    params = {
        "api_key": TMDB_API_KEY,
        "external_source": "imdb_id",
    }
    resp = requests.get(url, params=params, timeout=10)
    resp.raise_for_status()
    data = resp.json()

    results = data.get("movie_results", [])
    if not results:
        raise ValueError(f"No TMDB match found for IMDB ID: {imdb_id}")

    return results[0]["id"]


def download_poster(poster_path, filename):
    """Download poster from TMDB to static/images/posters/."""
    if not poster_path:
        print("  No poster available")
        return None

    # Use w500 size for good quality without being huge
    url = f"https://image.tmdb.org/t/p/w500{poster_path}"
    resp = requests.get(url, timeout=10)
    resp.raise_for_status()

    IMAGES_DIR.mkdir(parents=True, exist_ok=True)
    filepath = IMAGES_DIR / filename
    filepath.write_bytes(resp.content)
    print(f"  Poster saved: {filepath.relative_to(PROJECT_ROOT)}")
    return f"/images/posters/{filename}"


def extract_imdb_id(input_str):
    """Extract IMDB ID from string (handles raw ID or URL)."""
    # Check if it's already just an ID
    if re.match(r'^tt\d+$', input_str):
        return input_str

    # Try to extract from URL
    match = re.search(r'(tt\d+)', input_str)
    if match:
        return match.group(1)

    return None


def format_director(directors):
    """Format director(s) for YAML frontmatter."""
    if not directors:
        return '""'
    if len(directors) == 1:
        return f'"{directors[0]}"'
    # Multiple directors - use YAML list format
    return "[" + ", ".join(f'"{d}"' for d in directors) + "]"


def create_nfr_post(tmdb_data, imdb_id, nfr_year=2024):
    """Create a draft post for an NFR movie."""
    title = tmdb_data.get("title", "Unknown")
    slug = slugify(title)
    filename = f"{slug}.md"
    filepath = CONTENT_DIR / filename

    if filepath.exists():
        print(f"  Post already exists: {filepath.relative_to(PROJECT_ROOT)}")
        overwrite = input("  Overwrite? (y/N): ").strip().lower()
        if overwrite != 'y':
            return None

    # Format the date for Hugo
    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    # Extract metadata
    year = tmdb_data.get("release_date", "")[:4] if tmdb_data.get("release_date") else ""
    runtime = tmdb_data.get("runtime", "")
    overview = tmdb_data.get("overview", "")

    # Get directors from crew
    directors = []
    # Note: Full crew info requires a second API call, so we'll leave it blank for now
    # Users can fill it in or we can enhance this later

    # Genres
    genres = [g["name"] for g in tmdb_data.get("genres", [])]
    genres_yaml = "[" + ", ".join(genres) + "]" if genres else "[]"

    # Poster
    poster_url = ""
    if tmdb_data.get("poster_path"):
        print("  Downloading poster...")
        poster_filename = f"{slug}.jpg"
        poster_url = download_poster(tmdb_data["poster_path"], poster_filename)

    # Look up LOC description if this is a 2024 NFR film
    loc_description = ""
    if nfr_year == 2024:
        # Try to match the title to our NFR_2024 dictionary
        for nfr_title, nfr_data in NFR_2024.items():
            if title.lower() in nfr_title.lower() or nfr_title.lower() in title.lower():
                loc_description = nfr_data["description"]
                print(f"  Found LOC description for NFR 2024: {nfr_title}")
                break

    # Build NFR section content
    if loc_description:
        nfr_section = f"""## Why It's in the National Film Registry

{loc_description}

*Source: [Library of Congress National Film Registry 2024 announcement](https://newsroom.loc.gov/news/25-films-named-to-national-film-registry-for-preservation/)*"""
    else:
        nfr_section = """## Why It's in the National Film Registry

[Add information about why this film was selected for preservation by the Library of Congress]"""

    # Build the frontmatter and content
    content = f'''---
title: '{title}'
date: {now}
draft: true
series: "Found in the Darkroom"
summary: ""
imdb: "{imdb_id}"
poster: "{poster_url or ''}"
year: {year}
runtime: {runtime}
director: ""
genres: {genres_yaml}
nfr_year: {nfr_year}
letterboxd_url: ""
tags:
  - national-film-registry
  - home-video
---
{{{{< imdbposter >}}}}

| Date watched           |                       |
|------------------------|-----------------------|
| Format                 |                       |
| Watched Multiple Times |                       |
| Added to NFR           | {nfr_year}            |
| Letterboxd Rating      |                       |
| Personal Notes         |                       |

{{{{< /imdbposter >}}}}

{nfr_section}

## My Thoughts

{overview}

'''

    filepath.write_text(content)
    print(f"  Draft created: {filepath.relative_to(PROJECT_ROOT)}")
    print(f"\nNext steps:")
    print(f"  1. Fill in director and other metadata by running:")
    print(f"     python scripts/fetch_movie_data.py")
    print(f"  2. Add your viewing details and thoughts")
    if not loc_description:
        print(f"  3. Research why it was added to the NFR")
    print(f"  {'4' if not loc_description else '3'}. Add your Letterboxd URL if you've logged it there")

    return filepath


def list_nfr_2024():
    """Display the 2024 NFR inductees."""
    print("\n2024 National Film Registry Inductees:\n")
    for i, (title, data) in enumerate(NFR_2024.items(), 1):
        print(f"  {i:2}. {title} ({data['year']})")
    print()


def main():
    parser = argparse.ArgumentParser(
        description="Create NFR movie posts for 'Found in the Darkroom' series"
    )
    parser.add_argument("input", nargs="?", help="IMDB ID (tt1234567) or movie title")
    parser.add_argument("--list-2024", action="store_true", help="List 2024 NFR inductees")
    parser.add_argument("--nfr-year", type=int, default=2024, help="NFR induction year (default: 2024)")
    args = parser.parse_args()

    if args.list_2024:
        list_nfr_2024()
        sys.exit(0)

    if not args.input:
        print("Error: Please provide an IMDB ID or movie title")
        print("\nUsage:")
        print("  python scripts/new_nfr.py tt1234567")
        print("  python scripts/new_nfr.py 'No Country for Old Men'")
        print("  python scripts/new_nfr.py --list-2024")
        sys.exit(1)

    try:
        # Try to extract IMDB ID
        imdb_id = extract_imdb_id(args.input)

        if imdb_id:
            print(f"Looking up movie by IMDB ID: {imdb_id}")
            tmdb_id = get_tmdb_id_from_imdb(imdb_id)
            tmdb_data = get_tmdb_details(tmdb_id)
        else:
            # Assume it's a title search
            print(f"Searching for: {args.input}")
            # Try to find year in NFR list
            year_hint = None
            for title, data in NFR_2024.items():
                if args.input.lower() in title.lower() or title.lower() in args.input.lower():
                    year_hint = data["year"]
                    print(f"Found in NFR 2024 list: {title} ({data['year']})")
                    break

            search_result = search_tmdb_by_title(args.input, year_hint)
            if not search_result:
                print(f"Error: No movie found for '{args.input}'")
                sys.exit(1)

            print(f"Found: {search_result['title']} ({search_result.get('release_date', '')[:4]})")
            confirm = input("Is this correct? (Y/n): ").strip().lower()
            if confirm == 'n':
                print("Search cancelled")
                sys.exit(0)

            tmdb_id = search_result["id"]
            tmdb_data = get_tmdb_details(tmdb_id)
            imdb_id = tmdb_data.get("imdb_id", "")

            if not imdb_id:
                print("Warning: No IMDB ID found for this movie")

        print(f"\nCreating post for: {tmdb_data.get('title')}")
        create_nfr_post(tmdb_data, imdb_id, args.nfr_year)

    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()