apportate/src/uri.c

61 lines
1.8 KiB
C

#include "headers.h"
/* really hate using the preprocessor, but it makes sense in this context */
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
/* this regex is from the RFC describing URI syntax -- can't recall the */
/* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */
#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define SUBSTR_COUNT 8
#define PROTO 1
#define FQDN 3
#define PATH 4
int uri_parse(const char *uristr, uri *res)
{
char validp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT];
validp = 0;
regcomp(&regexp, REGEX_URI, REG_EXTENDED);
if( (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
}
return(0);
}