make-index/src/Recursor.c

396 lines
13 KiB
C

/** Copyright 2008, 2012 Neil Edelman, distributed under the terms of the
GNU General Public License, see copying.txt
{Recusor} is the main part of {MakeIndex}, a content management system that
generates static content, (mostly index.html,) on all the directories rooted
at the directory specified by the argument. It is based on a template file,
".index.html" and ".newsfeed.rss". Also included are files to summarise the
directory structure for a {xml} site map, compatible with Google, and any
{.news} for an {rss} feed.
There should be an <example> directory that has a bunch of files in it. Run
{bin/MakeIndex example/}; it should make a webpage out of the directory
structure and {.index.html}, open {example/index.html} after running to see.
* If the {.index.html} file exists in the <directory>, prints <index.html>
recursively; overwrites any {index.html} on all the directories rooted at
<directory>;
* if the {.sitemap.xml} file exists in <directory>, prints (and overwrites) an
index called {sitemap.xml};
* if the {.newsfeed.rss} file exists in <directory>, prints (and overwrites)
to {newsfeed.rss} all the {.news} files (if there are any.)
* Treats {.d} as a description of the file without the {.d};
if this is an empty text-file or a zero-byte file, it skips over this file.
* treats {index.d} as a description of the directory;
* treats {content.d} as an in-depth description of the directory,
replacing <index.d> when in the directory;
* treats {.d.jpg} as a image that will go with the description;
* treats {.news} as a newsworthy item; the format of this file is ISO 8601
date (YYYY-MM-DD,) next line title;
* treats {.link} as a link with the href in the file.
{.index.html}, {.sitemap.xml}, {.newsfeed.rss}, see {Parser} for recognised
symbols. Assumes '..' is the parent directory, '.' is the current directory,
and '/' is the directory separator; works for UNIX, MacOS, Windows.
If this is not the case, the constants are in {Files.c}.
@title Recursor
@author Neil
@std POSIX.1
@version 1.1; 2017-03 fixed pedantic warnings; command-line improvements
@since 1.0; 2016-09-19 Added umask
0.8; 2013-07 case-insensitive sort
0.7; 2012 sth.dsth.d handled properly
0.6; 2008-03-27
@fixme Don't have <directory> be an argument; just do it in the current.
@fixme Have a subset of LaTeX converted into html for the .d files?
@fixme Encoding is an issue; especially the newsfeed, which requires 7-bit.
@fixme It's not robust; eg @(files){@(files){Don't do this.}}. */
#include <stdlib.h> /* malloc free fgets */
#include <stdio.h> /* fprintf FILE */
#include <string.h> /* strcmp */
#include <unistd.h> /* chdir (POSIX, not ANSI) */
#include <sys/types.h> /* mode_t (umask) */
#include <sys/stat.h> /* umask */
#include "Files.h"
#include "Widget.h"
#include "Parser.h"
#include "Recursor.h"
/* constants */
static const char *const programme = "MakeIndex";
static const int version_major = 1;
static const int version_minor = 1;
static const size_t granularity = 1024;
static const int max_read = 0x1000;
static const char *html_index = "index.html";
static const char *xml_sitemap = "sitemap.xml";
static const char *rss_newsfeed = "newsfeed.rss";
static const char *template_index = ".index.html";
static const char *template_sitemap = ".sitemap.xml";
static const char *template_newsfeed = ".newsfeed.rss";
/* in Files.c */
extern const char *dir_current;
extern const char *dir_parent;
/* in Widget.c */
extern const char *dot_desc;
extern const char *dot_news;
/* public */
struct Recursor {
char *indexString;
struct Parser *indexParser; /* depends on indexString */
FILE *sitemap;
char *sitemapString;
struct Parser *sitemapParser; /* depends on sitemapString */
FILE *newsfeed;
char *newsfeedString;
struct Parser *newsfeedParser; /* depends on newsfeedString */
};
/* private */
static int filter(struct Files *const files, const char *fn);
static int recurse(struct Files *const parent);
static char *readFile(const char *filename);
static void usage(void);
/* there can only be one recursor at a time, sorry */
static struct Recursor *r = 0;
/* public */
/**
@param idx: file name of the prototype index file, eg, ".index.html".
@param map: file name of the prototype map file, eg, ".sitmap.xml".
@param news: file name of the prototype news file, eg, ".newsfeed.rss". */
struct Recursor *Recursor(const char *idx, const char *map, const char *news) {
if(!idx || !idx || !map || !news) return 0;
if(r) { fprintf(stderr, "Recursor: there is already a Recursor.\n");
return 0; }
r = malloc(sizeof(struct Recursor));
if(!r) { perror("recursor"); Recursor_(); return 0; }
r->indexString = 0;
r->indexParser = 0;
r->sitemap = 0;
r->sitemapString = 0;
r->sitemapParser = 0;
r->newsfeed = 0;
r->newsfeedString = 0;
r->newsfeedParser = 0;
/* open the files for writing (index is opened multiple times in the directories) */
if(!(r->sitemap = fopen(xml_sitemap, "w"))) perror(xml_sitemap);
if(!(r->newsfeed = fopen(rss_newsfeed, "w"))) perror(rss_newsfeed);
/* read from the input files */
if( !(r->indexString = readFile(idx))) {
fprintf(stderr, "Recursor: to make an index, create the file <%s>.\n",
idx);
}
if(r->sitemap && !(r->sitemapString = readFile(map))) {
fprintf(stderr, "Recursor: to make an sitemap, create the file <%s>.\n",
map);
}
if(r->newsfeed && !(r->newsfeedString = readFile(news))) {
fprintf(stderr, "Recursor: to make a newsfeed, create the file <%s>.\n",
news);
}
/* create Parsers attached to them */
if(r->indexString && !(r->indexParser = Parser(r->indexString))) {
fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", idx);
}
if(r->sitemapString && !(r->sitemapParser = Parser(r->sitemapString))) {
fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", map);
}
if(r->newsfeedString && !(r->newsfeedParser = Parser(r->newsfeedString))) {
fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", news);
}
/* if theirs no content, we have nothing to do */
if(!r->indexParser && !r->sitemapParser && !r->newsfeedParser) {
fprintf(stderr, "Recursor: no Parsers defined, it would be useless to "
"continue.\n");
Recursor_();
return 0;
}
/* parse the "header," ie, everything up to ~, the second arg is null
because we haven't set up the Files, so @files{}, @pwd{}, etc are
undefined */
ParserParse(r->sitemapParser, 0, 0, r->sitemap);
ParserParse(r->newsfeedParser, 0, 0, r->newsfeed);
return r;
}
/** Destructor. */
void Recursor_(void) {
if(!r) return;
if(r->sitemapParser && r->sitemap) {
ParserParse(r->sitemapParser, 0, -1, r->sitemap);
ParserParse(r->sitemapParser, 0, 0, r->sitemap);
}
if(r->sitemap && fclose(r->sitemap)) perror(xml_sitemap);
if(r->newsfeedParser && r->newsfeed) {
ParserParse(r->newsfeedParser, 0, -1, r->newsfeed);
ParserParse(r->newsfeedParser, 0, 0, r->newsfeed);
}
if(r->newsfeed && fclose(r->newsfeed)) perror(rss_newsfeed);
Parser_(&r->indexParser);
free(r->indexString);
Parser_(&r->sitemapParser);
free(r->sitemapString);
Parser_(&r->newsfeedParser);
free(r->newsfeedString);
free(r);
r = 0;
}
/** Actually does the recursion. */
int RecursorGo(void) {
if(!r) return 0;
return recurse(0);
}
/* private */
/** @implements FilesFilter */
static int filter(struct Files *const files, const char *fn) {
const char *str;
char filed[64];
FILE *fd;
if(!r) { fprintf(stderr, "Recusor::filter: recursor not initialised.\n");
return 0; }
/* *.d[.0]* */
for(str = fn; (str = strstr(str, dot_desc)); ) {
str += strlen(dot_desc);
if(*str == '\0' || *str == '.') return 0;
}
/* *.news$ */
if((str = strstr(fn, dot_news))) {
str += strlen(dot_news);
if(*str == '\0') {
if(WidgetSetNews(fn) && ParserParse(r->newsfeedParser, files, 0, r->newsfeed)) {
ParserRewind(r->newsfeedParser);
} else {
fprintf(stderr, "Recursor::filter: error writing news <%s>.\n", fn);
}
return 0;
}
}
/* . */
if(!strcmp(fn, dir_current)) return 0;
/* .. */
if(!strcmp(fn, dir_parent) && FilesIsRoot(files)) return 0;
/* index.html */
if(!strcmp(fn, html_index)) return 0;
/* add .d, check 1 line for \n (hmm, this must be a real time waster) */
if(strlen(fn) > sizeof(filed) - strlen(dot_desc) - 1) {
fprintf(stderr, "Recusor::filter: regected '%s' because it was too long (%d.)\n", fn, (int)sizeof(filed));
return 0;
}
strcpy(filed, fn);
strcat(filed, dot_desc);
if((fd = fopen(filed, "r"))) {
int ch = fgetc(fd);
if(ch == '\n' || ch == '\r' || ch == EOF) {
fprintf(stderr, "Recursor::filter: '%s' rejected.\n", fn);
return 0;
}
if(fclose(fd)) perror(filed);
}
return -1;
}
static int recurse(struct Files *const parent) {
struct Files *f;
const char *name;
FILE *fp;
f = Files(parent, &filter);
/* write the index */
if((fp = fopen(html_index, "w"))) {
ParserParse(r->indexParser, f, 0, fp);
ParserRewind(r->indexParser);
fclose(fp);
} else perror(html_index);
/* sitemap */
ParserParse(r->sitemapParser, f, 0, r->sitemap);
ParserRewind(r->sitemapParser);
/* recurse */
while(FilesAdvance(f)) {
if(!FilesIsDir(f) ||
!(name = FilesName(f)) ||
!strcmp(dir_current, name) ||
!strcmp(dir_parent, name) ||
!(name = FilesName(f))) continue;
if(chdir(name)) { perror(name); continue; }
recurse(f);
/* this happens on Windows; I don't know what to do */
if(chdir(dir_parent)) perror(dir_parent);
}
Files_(f);
return -1;
}
static char *readFile(const char *filename) {
char *buf = 0, *newBuf;
size_t bufPos = 0, bufSize = 0, rd;
FILE *fp;
if(!filename) return 0;
if(!(fp = fopen(filename, "r"))) { perror(filename); return 0; }
for( ; ; ) {
newBuf = realloc(buf, (bufSize += granularity) * sizeof(char));
if(!newBuf) { perror(filename); free(buf); return 0; }
buf = newBuf;
rd = fread(buf + bufPos, sizeof(char), granularity, fp);
bufPos += rd;
if(rd < granularity) { buf[bufPos] = '\0'; break; }
}
fprintf(stderr, "Opened '%s' and alloted %lu bytes to read %lu "
"characters.\n", filename, bufSize, bufPos);
if(fclose(fp)) perror(filename);
return buf; /* you must free() the memory! */
}
static void usage(void) {
fprintf(stderr, "Usage: %s [--directory|-d] <directory> | [\n"
"\t [--input-index |-ti] <.index.html>\n"
"\t| [--input-newsfeed|-tn] <.newsfeed.rss>\n"
"\t| [--input-sitemap |-ts] <.sitemap.xml>\n"
"\t| [--output-index |-i] <index.html>\n"
"\t| [--output-newsfeed|-n] <newsfeed.rss>\n"
"\t| [--output-sitemap |-s] <sitemap.xml>\n"
"] | [--help|-h]\n\n", programme);
fprintf(stderr, "All the defaults are listed, except --directory, which "
"requires a value.\n"
"Input are in <directory>.\n\n"
"Version %d.%d.\n\n"
"MakeIndex is a content management system that generates static\n"
"content, (mostly index.html,) on all the directories rooted at the\n"
"<directory> based on <%s>.\n\n"
"It also does some other stuff. See readme.txt or\n"
"http://neil.chaosnet.org/ for further info.\n\n",
version_major, version_minor, template_index);
fprintf(stderr, "MakeIndex Copyright 2008, 2012 Neil Edelman\n"
"This program comes with ABSOLUTELY NO WARRANTY.\n"
"This is free software, and you are welcome to redistribute it\n"
"under certain conditions; see gpl.txt.\n\n");
}
/* entry-point (shouldn't have a prototype) */
int main(int argc, char **argv) {
int ac, ret;
int is_help = 0, is_invalid = 0;
const char *directory = 0;
/* gperf is wonderful, but what about other build systems? */
for(ac = 1; ac < argc; ac++) {
const char *const av = argv[ac];
if(!strcmp("--help", av) || !strcmp("-h", av)) {
is_help = -1;
} else if(!strcmp("--directory", av) || !strcmp("-d", av)) {
if(directory) {
is_invalid = -1; break;
} else if(++ac < argc) {
directory = argv[ac];
} else {
is_invalid = -1; break;
}
} else if(!strcmp("--input-index", av) || !strcmp("-ii", av)) {
if(++ac < argc) {
template_index = argv[ac];
} else {
is_invalid = -1; break;
}
} else if(!strcmp("--input-newsfeed", av) || !strcmp("-in", av)) {
if(++ac < argc) {
template_newsfeed = argv[ac];
} else {
is_invalid = -1; break;
}
} else if(!strcmp("--input-sitemap", av) || !strcmp("-is", av)) {
if(++ac < argc) {
template_sitemap = argv[ac];
} else {
is_invalid = -1; break;
}
} else if(!strcmp("--output-index", av) || !strcmp("-oi", av)) {
if(++ac < argc) {
html_index = argv[ac];
} else {
is_invalid = -1; break;
}
} else if(!strcmp("--output-newsfeed", av) || !strcmp("-on", av)) {
if(++ac < argc) {
rss_newsfeed = argv[ac];
} else {
is_invalid = -1; break;
}
} else if(!strcmp("--output-sitemap", av) || !strcmp("-os", av)) {
if(++ac < argc) {
xml_sitemap = argv[ac];
} else {
is_invalid = -1; break;
}
}
}
if(is_help || !directory) {
usage();
return is_help && !is_invalid ? EXIT_SUCCESS : EXIT_FAILURE;
}
fprintf(stderr, "Changing directory to <%s>.\n", directory);
if(chdir(directory)) { perror(directory); return EXIT_FAILURE; }
/* make sure that umask is set so that others can read what we create
"warning: passing argument 1 of 'umask' with different width due to
prototype" <- umask's fault; can't change? */
umask(S_IWGRP | S_IWOTH);
/* recursing; fixme: this should be configurable */
if(!Recursor(template_index, template_sitemap, template_newsfeed))
return EXIT_FAILURE;
ret = RecursorGo();
Recursor_();
return ret ? EXIT_SUCCESS : EXIT_FAILURE;
}