make-index/src/Recursor.c

297 lines
11 KiB
C
Raw Normal View History

/** Copyright 2008, 2012 Neil Edelman, distributed under the terms of the
GNU General Public License, see copying.txt
2016-09-11 02:40:29 -04:00
2017-03-26 04:47:35 -04:00
{MakeIndex} is a simple content management system that generates static
content, (mostly index.html,) on all the directories rooted at the directory
specified by the argument. It is based on a template file, ".index.html" and
".newsfeed.rss". Also included are files to summarise the directory structure
for a {xml} site map, compatible with Google, and any {.news} for an {rss}
feed. It takes one argument, <directory>, which is the root of the recursion.
There should be an <example> directory that has a bunch of files in it. Run
{bin/MakeIndex example/}; it should make a webpage out of the directory
structure and {.index.html}, open {example/index.html} after running to see.
* If the {.index.html} file exists in the <directory>, prints <index.html>
recursively; overwrites any {index.html} on all the directories rooted at
<directory>;
* if the {.sitemap.xml} file exists in <directory>, prints (and overwrites) an
index called {sitemap.xml};
* if the {.newsfeed.rss} file exists in <directory>, prints (and overwrites)
to {newsfeed.rss} all the {.news} files (if there are any.)
* Treats {.d} as a description of the file without the {.d};
if this is an empty text-file or a zero-byte file, it skips over this file.
* treats {index.d} as a description of the directory;
* treats {content.d} as an in-depth description of the directory,
replacing <index.d> when in the directory;
* treats {.d.jpg} as a image that will go with the description;
* treats {.news} as a newsworthy item; the format of this file is ISO 8601
date (YYYY-MM-DD,) next line title;
* treats {.link} as a link with the href in the file.
{.index.html}, {.sitemap.xml}, {.newsfeed.rss}, see {Parser} for recognised
symbols. Assumes '..' is the parent directory, '.' is the current directory,
and '/' is the directory separator; works for UNIX, MacOS, Windows.
If this is not the case, the constants are in {Files.c}.
@title Parser
@author Neil
2017-03-26 04:47:35 -04:00
@std C89/90
@version 1.1; 2017-03 fixed pedantic warnings; took out arg
@since 1.0; 2016-09-19 Added umask
0.8; 2013-07 case-insensitive sort
0.7; 2012 sth.dsth.d handled properly
0.6; 2008-03-27
@fixme Don't have <directory> be an argument; just do it in the current.
@fixme Have a subset of LaTeX converted into html for the .d files?
@fixme Encoding is an issue; especially the newsfeed, 7bit.
@fixme It's not robust; eg @(files){@(files){Don't do this.}}. */
2016-09-11 02:40:29 -04:00
#include <stdlib.h> /* malloc free fgets */
#include <stdio.h> /* fprintf FILE */
#include <string.h> /* strcmp */
#include <unistd.h> /* chdir (POSIX, not ANSI) */
#include <sys/stat.h> /* umask */
2016-09-11 02:40:29 -04:00
#include "Files.h"
#include "Widget.h"
#include "Parser.h"
#include "Recursor.h"
/* public */
struct Recursor {
char *indexString;
struct Parser *indexParser; /* depends on indexString */
FILE *sitemap;
char *sitemapString;
struct Parser *sitemapParser; /* depends on sitemapString */
FILE *newsfeed;
char *newsfeedString;
struct Parser *newsfeedParser; /* depends on newsfeedString */
};
/* private */
int filter(const struct Files *, const char *fn);
int recurse(const struct Files *parent);
char *readFile(const char *filename);
void usage(const char *programme);
/* constants */
static const int versionMajor = 0;
static const int versionMinor = 8;
2017-03-26 04:47:35 -04:00
static const size_t granularity= 1024;
2016-09-11 02:40:29 -04:00
static const int maxRead = 0x1000;
const char *htmlIndex = "index.html"; /* in multiple files */
static const char *xmlSitemap = "sitemap.xml";
static const char *rssNewsfeed = "newsfeed.rss";
static const char *tmplIndex = ".index.html";
static const char *tmplSitemap = ".sitemap.xml";
static const char *tmplNewsfeed= ".newsfeed.rss";
/* in Files.c */
extern const char *dirCurrent;
extern const char *dirParent;
/* in Widget.c */
2017-03-26 04:47:35 -04:00
extern const char *dot_desc;
extern const char *dot_news;
2016-09-11 02:40:29 -04:00
/* there can only be one recursor at a time, sorry */
static struct Recursor *r = 0;
/* public */
2017-03-26 04:47:35 -04:00
struct Recursor *Recursor(const char *idx, const char *map, const char *news) {
if(!idx || !idx || !map || !news) return 0;
2016-09-11 02:40:29 -04:00
if(r) { fprintf(stderr, "Recursor: there is already a Recursor.\n"); return 0; }
r = malloc(sizeof(struct Recursor));
if(!r) { perror("recursor"); Recursor_(); return 0; }
r->indexString = 0;
r->indexParser = 0;
r->sitemap = 0;
r->sitemapString = 0;
r->sitemapParser = 0;
r->newsfeed = 0;
r->newsfeedString = 0;
r->newsfeedParser = 0;
/* open the files for writing (index is opened multiple times in the directories) */
if(!(r->sitemap = fopen(xmlSitemap, "w"))) perror(xmlSitemap);
if(!(r->newsfeed = fopen(rssNewsfeed, "w"))) perror(rssNewsfeed);
/* read from the input files */
2017-03-26 04:47:35 -04:00
if( !(r->indexString = readFile(idx))) {
fprintf(stderr, "Recursor: to make an index, create the file <%s>.\n", idx);
2016-09-11 02:40:29 -04:00
}
if(r->sitemap && !(r->sitemapString = readFile(map))) {
fprintf(stderr, "Recursor: to make an sitemap, create the file <%s>.\n", map);
}
if(r->newsfeed && !(r->newsfeedString = readFile(news))) {
fprintf(stderr, "Recursor: to make a newsfeed, create the file <%s>.\n", news);
}
/* create Parsers attached to them */
if(r->indexString && !(r->indexParser = Parser(r->indexString))) {
2017-03-26 04:47:35 -04:00
fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", idx);
2016-09-11 02:40:29 -04:00
}
if(r->sitemapString && !(r->sitemapParser = Parser(r->sitemapString))) {
fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", map);
}
if(r->newsfeedString && !(r->newsfeedParser = Parser(r->newsfeedString))) {
fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", news);
}
/* if theirs no content, we have nothing to do */
if(!r->indexParser && !r->sitemapParser && !r->newsfeedParser) {
fprintf(stderr, "Recursor: no Parsers defined, it would be useless to continue.\n");
Recursor_();
return 0;
}
/* parse the "header," ie, everything up to ~, the second arg is null
because we haven't set up the Files, so @files{}, @pwd{}, etc are undefined */
ParserParse(r->sitemapParser, 0, 0, r->sitemap);
ParserParse(r->newsfeedParser, 0, 0, r->newsfeed);
return r;
}
void Recursor_(void) {
if(!r) return;
if(r->sitemapParser && r->sitemap) {
ParserParse(r->sitemapParser, 0, -1, r->sitemap);
ParserParse(r->sitemapParser, 0, 0, r->sitemap);
}
if(r->sitemap && fclose(r->sitemap)) perror(xmlSitemap);
if(r->newsfeedParser && r->newsfeed) {
ParserParse(r->newsfeedParser, 0, -1, r->newsfeed);
ParserParse(r->newsfeedParser, 0, 0, r->newsfeed);
}
if(r->newsfeed && fclose(r->newsfeed)) perror(rssNewsfeed);
2017-03-26 04:47:35 -04:00
Parser_(&r->indexParser);
2016-09-11 02:40:29 -04:00
free(r->indexString);
2017-03-26 04:47:35 -04:00
Parser_(&r->sitemapParser);
2016-09-11 02:40:29 -04:00
free(r->sitemapString);
2017-03-26 04:47:35 -04:00
Parser_(&r->newsfeedParser);
2016-09-11 02:40:29 -04:00
free(r->newsfeedString);
free(r);
r = 0;
}
/* entry-point (shouldn't have a prototype) */
int main(int argc, char **argv) {
int ret;
2016-09-11 02:40:29 -04:00
/* set up; fixme: dangerous! use stdarg, have a -delete, -write, and -help */
if(argc <= 1) { usage(argv[0]); return EXIT_SUCCESS; }
fprintf(stderr, "Changing directory to <%s>.\n", argv[1]);
if(chdir(argv[1])) { perror(argv[1]); return EXIT_FAILURE; }
/* make sure that umask is set so that others can read what we create */
2017-03-26 04:47:35 -04:00
umask((mode_t)(S_IWGRP | S_IWOTH));
2016-09-11 02:40:29 -04:00
/* recursing; fixme: this should be configurable */
if(!Recursor(tmplIndex, tmplSitemap, tmplNewsfeed)) return EXIT_FAILURE;
ret = recurse(0);
Recursor_();
return ret ? EXIT_SUCCESS : EXIT_FAILURE;
}
/* private */
int filter(const struct Files *files, const char *fn) {
char *str, filed[64];
FILE *fd;
if(!r) { fprintf(stderr, "Recusor::filter: recursor not initialised.\n"); return 0; }
/* *.d[.0]* */
2017-03-26 04:47:35 -04:00
for(str = (char *)fn; (str = strstr(str, dot_desc)); ) {
str += strlen(dot_desc);
2016-09-11 02:40:29 -04:00
if(*str == '\0' || *str == '.') return 0;
}
/* *.news$ */
2017-03-26 04:47:35 -04:00
if((str = strstr(fn, dot_news))) {
str += strlen(dot_news);
2016-09-11 02:40:29 -04:00
if(*str == '\0') {
if(WidgetSetNews(fn) && ParserParse(r->newsfeedParser, files, 0, r->newsfeed)) {
ParserRewind(r->newsfeedParser);
} else {
fprintf(stderr, "Recursor::filter: error writing news <%s>.\n", fn);
}
return 0;
}
}
/* . */
if(!strcmp(fn, dirCurrent)) return 0;
/* .. */
if(!strcmp(fn, dirParent) && FilesIsRoot(files)) return 0;
/* index.html */
if(!strcmp(fn, htmlIndex)) return 0;
/* add .d, check 1 line for \n (hmm, this must be a real time waster) */
2017-03-26 04:47:35 -04:00
if(strlen(fn) > sizeof(filed) - strlen(dot_desc) - 1) {
2016-09-11 02:40:29 -04:00
fprintf(stderr, "Recusor::filter: regected '%s' because it was too long (%d.)\n", fn, (int)sizeof(filed));
return 0;
}
strcpy(filed, fn);
2017-03-26 04:47:35 -04:00
strcat(filed, dot_desc);
2016-09-11 02:40:29 -04:00
if((fd = fopen(filed, "r"))) {
int ch = fgetc(fd);
if(ch == '\n' || ch == '\r' || ch == EOF) {
fprintf(stderr, "Recursor::filter: '%s' rejected.\n", fn);
return 0;
}
if(fclose(fd)) perror(filed);
}
return -1;
}
int recurse(const struct Files *parent) {
struct Files *f;
char *name;
FILE *fp;
f = Files(parent, &filter);
/* write the index */
if((fp = fopen(htmlIndex, "w"))) {
ParserParse(r->indexParser, f, 0, fp);
ParserRewind(r->indexParser);
fclose(fp);
} else perror(htmlIndex);
/* sitemap */
ParserParse(r->sitemapParser, f, 0, r->sitemap);
ParserRewind(r->sitemapParser);
/* recurse */
while(FilesAdvance(f)) {
if(!FilesIsDir(f) ||
!(name = FilesName(f)) ||
!strcmp(dirCurrent, name) ||
!strcmp(dirParent, name) ||
!(name = FilesName(f))) continue;
if(chdir(name)) { perror(name); continue; }
recurse(f);
/* this happens on Windows; I don't know what to do */
if(chdir(dirParent)) perror(dirParent);
}
Files_(f);
return -1;
}
char *readFile(const char *filename) {
char *buf = 0, *newBuf;
2017-03-26 04:47:35 -04:00
size_t bufPos = 0, bufSize = 0, rd;
2016-09-11 02:40:29 -04:00
FILE *fp;
if(!filename) return 0;
if(!(fp = fopen(filename, "r"))) { perror(filename); return 0; }
for( ; ; ) {
newBuf = realloc(buf, (bufSize += granularity) * sizeof(char));
if(!newBuf) { perror(filename); free(buf); return 0; }
2017-03-26 04:47:35 -04:00
buf = newBuf;
rd = fread(buf + bufPos, sizeof(char), granularity, fp);
bufPos += rd;
if(rd < granularity) { buf[bufPos] = '\0'; break; }
2016-09-11 02:40:29 -04:00
}
2017-03-26 04:47:35 -04:00
fprintf(stderr, "Opened '%s' and alloted %lu bytes to read %lu "
"characters.\n", filename, bufSize, bufPos);
2016-09-11 02:40:29 -04:00
if(fclose(fp)) perror(filename);
return buf; /** you must free() the memory! */
}
void usage(const char *programme) {
fprintf(stderr, "Usage: %s <directory>\n\n", programme);
fprintf(stderr, "Version %d.%d.\n\n", versionMajor, versionMinor);
fprintf(stderr, "MakeIndex is a content generator that places a changing\n");
fprintf(stderr, "index.html on all the directories under <directory>\n");
fprintf(stderr, "based on a template file in <directory> called <%s>.\n", tmplIndex);
fprintf(stderr, "It also does some other stuff.\n\n");
fprintf(stderr, "See readme.txt or http://neil.chaosnet.org/ for further info.\n\n");
fprintf(stderr, "MakeIndex Copyright 2008, 2012 Neil Edelman\n");
fprintf(stderr, "This program comes with ABSOLUTELY NO WARRANTY.\n");
fprintf(stderr, "This is free software, and you are welcome to redistribute it\n");
fprintf(stderr, "under certain conditions; see gpl.txt.\n\n");
}