#include "headers.h" /* really hate using the preprocessor, but it makes sense in this context */ #define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)" #define SUBSTR_COUNT 9 #define PROTO 2 #define FQDN 4 #define PATH 5 int uri_parse(const char *uristr, uri *res) { int regerrcode; char validp; regex_t regexp; regmatch_t match[SUBSTR_COUNT+5]; char errbuf[BUFSIZ] = {0}; validp = 0; if( (regerrcode = regcomp(®exp, REGEX_URI, REG_EXTENDED))) { regerror(regerrcode, ®exp, errbuf, BUFSIZ); fprintf(stderr, "regular expression error: %s\n", errbuf); return(1); } if( REG_NOMATCH == (validp = regexec(®exp, uristr, SUBSTR_COUNT, match, 0))) { return(1); } /* not very elegant but it does the job. i shouldn't be thinking about */ /* elegance at this stage in my programming life, anyways... comes */ /* with experience. */ res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo); res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo); /* if the difference below is less than 1, our path doesn't exist. */ /* Compensate by setting it to '/' which will always return a root */ /* document from an HTTP server -- and, presumably, others. We'll */ /* see, I suppose. */ if((match[PATH].rm_eo - match[PATH].rm_so) < 1) { res->path = "/"; } else { /* we only have a simple path */ res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo); } return(0); }