61 lines
1.8 KiB
C
61 lines
1.8 KiB
C
#include "headers.h"
|
|
|
|
|
|
/* really hate using the preprocessor, but it makes sense in this context */
|
|
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
|
|
/* this regex is from the RFC describing URI syntax -- can't recall the */
|
|
/* exact one right now. anyway, it's a little too general for my tastes, */
|
|
/* but the one I came up with was trash (unsurprisingly) so here we are. */
|
|
/* need to modify this in the future to be less liberal... */
|
|
#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?"
|
|
|
|
|
|
#define SUBSTR_COUNT 8
|
|
#define PROTO 1
|
|
#define FQDN 3
|
|
#define PATH 4
|
|
|
|
|
|
int uri_parse(const char *uristr, uri *res)
|
|
{
|
|
char validp;
|
|
regex_t regexp;
|
|
regmatch_t match[SUBSTR_COUNT];
|
|
|
|
validp = 0;
|
|
|
|
|
|
regcomp(®exp, REGEX_URI, REG_EXTENDED);
|
|
if( (validp = regexec(®exp, uristr, SUBSTR_COUNT, match, 0)))
|
|
{
|
|
return(1);
|
|
}
|
|
|
|
|
|
/* not very elegant but it does the job. i shouldn't be thinking about */
|
|
/* elegance at this stage in my programming life, anyways... comes */
|
|
/* with experience. */
|
|
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
|
|
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
|
|
|
|
/* if the difference below is less than 1, our path doesn't exist. */
|
|
/* Compensate by setting it to '/' which will always return a root */
|
|
/* document from an HTTP server -- and, presumably, others. We'll */
|
|
/* see, I suppose. */
|
|
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
|
|
{
|
|
res->path = "/";
|
|
}
|
|
else
|
|
{
|
|
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
|
|
}
|
|
|
|
|
|
return(0);
|
|
}
|
|
|
|
|
|
|
|
|