mako/create-index-entry.awk
2023-09-14 10:39:45 -07:00

123 lines
3.4 KiB
Awk
Executable File

#! /usr/bin/awk -f
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-#
# create-post-entry.awk
#
# Create an index entry from a Gemtext file
#
# This program transforms information from a gemtext post into an index entry.
# The index entry needs to be a valid Gemini hyperlink. The hyperlink notation
# contains the following elements:
#
# * Hyperlink line marker. That is, the line must start with "=> ".
# * URL as the second element.
# * Desription as a optional third element.
#
# The program:
#
# * Forms the URL from the filename of the file that it is processing. Thus,
# it will not work correctly with text from standard input.
#
# * Constructs the desciption from the
# * Published date
# * Title
# * Revised date (if present)
#
# The AWK program expects the Gemtext file to:
#
# 1. Contain the title on the first line, without the heading level prefix.
#
# 2. Record the published date on a line that begins with "Published: " and
# then has an ISO formatted date, e.g., 2023-05-26.
#
# 3. Record revision history on lines that begin with "Revised: " and then
# have ISO formatted date. These are optional. The program takes the last
# revision date.
#
# The program will write a warning to standard error when there are empty
# values for the title or published date. If the program cannot determine the
# file name, then it exits with error code 1.
#
#
# USAGE
#
# awk -f create-post-entry.awk gemtext-file
#
# AUTHOR
#
# © Andrew Stryker <axs@sdf.org>
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-#
BEGIN {
# Include colon and comma for record splitting
FS = "[:,[:space:]]+"
# Declare the variables that we will use and set them to empty
# strings. Note, AWK evaluates the empty string as false.
title = ""
published = ""
revised = ""
tags["---"] = 1
}
FNR == 1 {
# if we cannot get a file name, then we cannot create a valid URL
# and need to fail
assert(FILENAME != "-", "Could not determine the filename")
title = gensub(/^#+ */, "", 1, $0) # remove heading marker, if present
if (!title) {
print "Missing title on the first line" > "/dev/stderr"
}
next
}
/^Published: +[[:digit:]]{4}(-[[:digit:]]{2}){2}/ {
published = $2
next
}
/^Revised: +[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}/ {
revised = sprintf(", revised on %s", $2)
next
}
# Create a set of tags
/^Tags:/ {
for (i = 2; i <= NF; ++i) {
tags[$i] = 1
}
next
}
END {
if (_assert_exit) {
exit 1
}
if (!published) {
printf("Missing published date in %s\n", FILENAME) \
> "/dev/stderr"
}
for (t in tags) {
printf("%s\t=> %s %s -- %s%s\n", t, FILENAME, published,
title, revised)
}
}
# assert --- assert that a condition is true. Otherwise, exit.
# adapted from:
# https://www.gnu.org/software/gawk/manual/gawk.html#Assert-Function
function assert(condition, string) {
if (! condition) {
printf("%s:%d: assertion failed: %s\n",
FILENAME, FNR, string) > "/dev/stderr"
_assert_exit = 1
exit 1
}
}
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-#