/** Copyright 2008, 2012 Neil Edelman, distributed under the terms of the GNU General Public License, see copying.txt {Recursor} is the main part of {MakeIndex}, a content management system that generates static content, (mostly index.html,) on all the directories rooted at the directory specified by {--directory} or {-d}. It is based on a template file, {.index.html} and {.newsfeed.rss}, which are changeable. Also included are files to summarise the directory structure for a {xml} site map, compatible with Google, and any {.news} for an {rss} feed. There should be an directory that has a bunch of files in it. Run {bin/MakeIndex -d example/}; it should make a webpage out of the directory structure and {.index.html}, open {example/index.html} after running to see. * If the {.index.html} file exists in the , prints recursively; overwrites any {index.html} on all the directories rooted at ; * if the {.sitemap.xml} file exists in , prints (and overwrites) an index called {sitemap.xml}; * if the {.newsfeed.rss} file exists in , prints (and overwrites) to {newsfeed.rss} all the {.news} files (if there are any.) * Treats {.d} as a description of the file without the {.d}; if this is an empty text-file or a zero-byte file, it skips over this file. * treats {index.d} as a description of the directory; * treats {content.d} as an in-depth description of the directory, replacing when in the directory; * treats {.d.jpg} as a image that will go with the description; * treats {.news} as a newsworthy item; the format of this file is ISO 8601 date (YYYY-MM-DD,) next line title; * treats {.link} as a link with the href in the file. {.index.html}, {.sitemap.xml}, {.newsfeed.rss}, see {Parser} for recognised symbols. Assumes '..' is the parent directory, '.' is the current directory, and '/' is the directory separator; works for UNIX, MacOS, and Windows. If this is not the case, the constants are in {Files.c}. @title Recursor @author Neil @std POSIX.1 @version 2018-03 Removed version numbers. @since 1.1; 2017-03 Fixed pedantic warnings; command-line improvements. 1.0; 2016-09-19 Added umask. 0.8; 2013-07 Case-insensitive sort. 0.7; 2012 {sth.dsth.d} handled properly. 0.6; 2008-03-27 Made something out of an earlier work. @fixme Don't have be an argument; just do it in the current. @fixme Borrow {Cdoc} parser for the {.d} files. @fixme Encoding is an issue; especially the newsfeed, which requires 7-bit. @fixme It's not robust; eg @(files){@(files){Don't do this.}}. @fixme Simplify the command line arguments. */ #include /* malloc free fgets */ #include /* fprintf FILE */ #include /* strcmp */ #include /* chdir (POSIX, not ANSI) */ #include /* mode_t (umask) */ #include /* umask */ #include "Files.h" #include "Widget.h" #include "Parser.h" #include "Recursor.h" /* constants */ static const char *const programme = "MakeIndex"; static const int version_major = 1; static const int version_minor = 1; static const size_t granularity = 1024; static const char *html_index = "index.html"; static const char *xml_sitemap = "sitemap.xml"; static const char *rss_newsfeed = "newsfeed.rss"; static const char *template_index = ".index.html"; static const char *template_sitemap = ".sitemap.xml"; static const char *template_newsfeed = ".newsfeed.rss"; /* in Files.c */ extern const char *dir_current; extern const char *dir_parent; /* in Widget.c */ extern const char *dot_desc; extern const char *dot_news; /* public */ struct Recursor { char *indexString; struct Parser *indexParser; /* depends on indexString */ FILE *sitemap; char *sitemapString; struct Parser *sitemapParser; /* depends on sitemapString */ FILE *newsfeed; char *newsfeedString; struct Parser *newsfeedParser; /* depends on newsfeedString */ }; /* private */ static int filter(struct Files *const files, const char *fn); static int recurse(struct Files *const parent); static char *readFile(const char *filename); static void usage(void); /* there can only be one recursor at a time, sorry */ static struct Recursor *r = 0; /* public */ /** @param idx: file name of the prototype index file, eg, ".index.html". @param map: file name of the prototype map file, eg, ".sitmap.xml". @param news: file name of the prototype news file, eg, ".newsfeed.rss". */ struct Recursor *Recursor(const char *idx, const char *map, const char *news) { if(!idx || !idx || !map || !news) return 0; if(r) { fprintf(stderr, "Recursor: there is already a Recursor.\n"); return 0; } r = malloc(sizeof(struct Recursor)); if(!r) { perror("recursor"); Recursor_(); return 0; } r->indexString = 0; r->indexParser = 0; r->sitemap = 0; r->sitemapString = 0; r->sitemapParser = 0; r->newsfeed = 0; r->newsfeedString = 0; r->newsfeedParser = 0; /* open the files for writing (index is opened multiple times in the directories) */ if(!(r->sitemap = fopen(xml_sitemap, "w"))) perror(xml_sitemap); if(!(r->newsfeed = fopen(rss_newsfeed, "w"))) perror(rss_newsfeed); /* read from the input files */ if( !(r->indexString = readFile(idx))) { fprintf(stderr, "Recursor: to make an index, create the file <%s>.\n", idx); } if(r->sitemap && !(r->sitemapString = readFile(map))) { fprintf(stderr, "Recursor: to make an sitemap, create the file <%s>.\n", map); } if(r->newsfeed && !(r->newsfeedString = readFile(news))) { fprintf(stderr, "Recursor: to make a newsfeed, create the file <%s>.\n", news); } /* create Parsers attached to them */ if(r->indexString && !(r->indexParser = Parser(r->indexString))) { fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", idx); } if(r->sitemapString && !(r->sitemapParser = Parser(r->sitemapString))) { fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", map); } if(r->newsfeedString && !(r->newsfeedParser = Parser(r->newsfeedString))) { fprintf(stderr, "Recursor: error generating Parser from <%s>.\n", news); } /* if theirs no content, we have nothing to do */ if(!r->indexParser && !r->sitemapParser && !r->newsfeedParser) { fprintf(stderr, "Recursor: no Parsers defined, it would be useless to " "continue.\n"); Recursor_(); return 0; } /* parse the "header," ie, everything up to ~, the second arg is null because we haven't set up the Files, so @files{}, @pwd{}, etc are undefined */ ParserParse(r->sitemapParser, 0, 0, r->sitemap); ParserParse(r->newsfeedParser, 0, 0, r->newsfeed); return r; } /** Destructor. */ void Recursor_(void) { if(!r) return; if(r->sitemapParser && r->sitemap) { ParserParse(r->sitemapParser, 0, -1, r->sitemap); ParserParse(r->sitemapParser, 0, 0, r->sitemap); } if(r->sitemap && fclose(r->sitemap)) perror(xml_sitemap); if(r->newsfeedParser && r->newsfeed) { ParserParse(r->newsfeedParser, 0, -1, r->newsfeed); ParserParse(r->newsfeedParser, 0, 0, r->newsfeed); } if(r->newsfeed && fclose(r->newsfeed)) perror(rss_newsfeed); Parser_(&r->indexParser); free(r->indexString); Parser_(&r->sitemapParser); free(r->sitemapString); Parser_(&r->newsfeedParser); free(r->newsfeedString); free(r); r = 0; } /** Actually does the recursion. */ int RecursorGo(void) { if(!r) return 0; return recurse(0); } /* private */ /** @implements FilesFilter */ static int filter(struct Files *const files, const char *fn) { const char *str; char filed[64]; FILE *fd; if(!r) { fprintf(stderr, "Recusor::filter: recursor not initialised.\n"); return 0; } /* *.d[.0]* */ for(str = fn; (str = strstr(str, dot_desc)); ) { str += strlen(dot_desc); if(*str == '\0' || *str == '.') return 0; } /* *.news$ */ if((str = strstr(fn, dot_news))) { str += strlen(dot_news); if(*str == '\0') { if(WidgetSetNews(fn) && ParserParse(r->newsfeedParser, files, 0, r->newsfeed)) { ParserRewind(r->newsfeedParser); } else { fprintf(stderr, "Recursor::filter: error writing news <%s>.\n", fn); } return 0; } } /* . */ if(!strcmp(fn, dir_current)) return 0; /* .. */ if(!strcmp(fn, dir_parent) && FilesIsRoot(files)) return 0; /* index.html */ if(!strcmp(fn, html_index)) return 0; /* add .d, check 1 line for \n (hmm, this must be a real time waster) */ if(strlen(fn) > sizeof(filed) - strlen(dot_desc) - 1) { fprintf(stderr, "Recusor::filter: regected '%s' because it was too long (%d.)\n", fn, (int)sizeof(filed)); return 0; } strcpy(filed, fn); strcat(filed, dot_desc); if((fd = fopen(filed, "r"))) { int ch = fgetc(fd); if(ch == '\n' || ch == '\r' || ch == EOF) { fprintf(stderr, "Recursor::filter: '%s' rejected.\n", fn); return 0; } if(fclose(fd)) perror(filed); } return -1; } static int recurse(struct Files *const parent) { struct Files *f; const char *name; FILE *fp; f = Files(parent, &filter); /* write the index */ if((fp = fopen(html_index, "w"))) { ParserParse(r->indexParser, f, 0, fp); ParserRewind(r->indexParser); fclose(fp); } else perror(html_index); /* sitemap */ ParserParse(r->sitemapParser, f, 0, r->sitemap); ParserRewind(r->sitemapParser); /* recurse */ while(FilesAdvance(f)) { if(!FilesIsDir(f) || !(name = FilesName(f)) || !strcmp(dir_current, name) || !strcmp(dir_parent, name) || !(name = FilesName(f))) continue; if(chdir(name)) { perror(name); continue; } recurse(f); /* this happens on Windows; I don't know what to do */ if(chdir(dir_parent)) perror(dir_parent); } Files_(f); return -1; } static char *readFile(const char *filename) { char *buf = 0, *newBuf; size_t bufPos = 0, bufSize = 0, rd; FILE *fp; if(!filename) return 0; if(!(fp = fopen(filename, "r"))) { perror(filename); return 0; } for( ; ; ) { newBuf = realloc(buf, (bufSize += granularity) * sizeof(char)); if(!newBuf) { perror(filename); free(buf); return 0; } buf = newBuf; rd = fread(buf + bufPos, sizeof(char), granularity, fp); bufPos += rd; if(rd < granularity) { buf[bufPos] = '\0'; break; } } fprintf(stderr, "Opened '%s' and alloted %lu bytes to read %lu " "characters.\n", filename, bufSize, bufPos); if(fclose(fp)) perror(filename); return buf; /* you must free() the memory! */ } static void usage(void) { fprintf(stderr, "Usage: %s [--directory|-d] | [\n" "\t [--input-index |-ti] <.index.html>\n" "\t| [--input-newsfeed|-tn] <.newsfeed.rss>\n" "\t| [--input-sitemap |-ts] <.sitemap.xml>\n" "\t| [--output-index |-i] \n" "\t| [--output-newsfeed|-n] \n" "\t| [--output-sitemap |-s] \n" "] | [--help|-h]\n\n", programme); fprintf(stderr, "All the defaults are listed, except --directory, which " "requires a value.\n" "Input are in .\n\n" "Version %d.%d.\n\n" "MakeIndex is a content management system that generates static\n" "content, (mostly index.html,) on all the directories rooted at the\n" " based on <%s>.\n\n" "It also does some other stuff. See readme.txt or\n" "http://neil.chaosnet.org/ for further info.\n\n", version_major, version_minor, template_index); fprintf(stderr, "MakeIndex Copyright 2008, 2012 Neil Edelman\n" "This program comes with ABSOLUTELY NO WARRANTY.\n" "This is free software, and you are welcome to redistribute it\n" "under certain conditions; see gpl.txt.\n\n"); } /* Entry-point (shouldn't have a prototype.) */ int main(int argc, char **argv) { int ac, ret; int is_help = 0, is_invalid = 0; const char *directory = 0; /* gperf is wonderful, but what about other build systems? */ for(ac = 1; ac < argc; ac++) { const char *const av = argv[ac]; if(!strcmp("--help", av) || !strcmp("-h", av)) { is_help = -1; } else if(!strcmp("--directory", av) || !strcmp("-d", av)) { if(directory) { is_invalid = -1; break; } else if(++ac < argc) { directory = argv[ac]; } else { is_invalid = -1; break; } } else if(!strcmp("--input-index", av) || !strcmp("-ii", av)) { if(++ac < argc) { template_index = argv[ac]; } else { is_invalid = -1; break; } } else if(!strcmp("--input-newsfeed", av) || !strcmp("-in", av)) { if(++ac < argc) { template_newsfeed = argv[ac]; } else { is_invalid = -1; break; } } else if(!strcmp("--input-sitemap", av) || !strcmp("-is", av)) { if(++ac < argc) { template_sitemap = argv[ac]; } else { is_invalid = -1; break; } } else if(!strcmp("--output-index", av) || !strcmp("-oi", av)) { if(++ac < argc) { html_index = argv[ac]; } else { is_invalid = -1; break; } } else if(!strcmp("--output-newsfeed", av) || !strcmp("-on", av)) { if(++ac < argc) { rss_newsfeed = argv[ac]; } else { is_invalid = -1; break; } } else if(!strcmp("--output-sitemap", av) || !strcmp("-os", av)) { if(++ac < argc) { xml_sitemap = argv[ac]; } else { is_invalid = -1; break; } } } if(is_help || !directory) { usage(); return is_help || !is_invalid ? EXIT_SUCCESS : EXIT_FAILURE; } fprintf(stderr, "Changing directory to <%s>.\n", directory); if(chdir(directory)) { perror(directory); return EXIT_FAILURE; } /* make sure that umask is set so that others can read what we create "warning: passing argument 1 of 'umask' with different width due to prototype" <- umask's fault; can't change? */ umask(S_IWGRP | S_IWOTH); /* recursing; fixme: this should be configurable */ if(!Recursor(template_index, template_sitemap, template_newsfeed)) return EXIT_FAILURE; ret = RecursorGo(); Recursor_(); return ret ? EXIT_SUCCESS : EXIT_FAILURE; }