#include "headers.h" /* really hate using the preprocessor, but it makes sense in this context */ /* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */ /* this regex is from the RFC describing URI syntax -- can't recall the */ /* exact one right now. anyway, it's a little too general for my tastes, */ /* but the one I came up with was trash (unsurprisingly) so here we are. */ /* need to modify this in the future to be less liberal... */ #define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?" #define SUBSTR_COUNT 8 #define PROTO 1 #define FQDN 3 #define PATH 4 int uri_parse(const char *uristr, uri *res) { char validp; regex_t regexp; regmatch_t match[SUBSTR_COUNT]; validp = 0; regcomp(®exp, REGEX_URI, REG_EXTENDED); if( (validp = regexec(®exp, uristr, SUBSTR_COUNT, match, 0))) { return(1); } /* not very elegant but it does the job. i shouldn't be thinking about */ /* elegance at this stage in my programming life, anyways... comes */ /* with experience. */ res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo); res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo); /* if the difference below is less than 1, our path doesn't exist. */ /* Compensate by setting it to '/' which will always return a root */ /* document from an HTTP server -- and, presumably, others. We'll */ /* see, I suppose. */ if((match[PATH].rm_eo - match[PATH].rm_so) < 1) { res->path = "/"; } else { res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo); } return(0); }