apportate/src/uri.c

83 lines
2.6 KiB
C

#include "headers.h"
/* really hate using the preprocessor, but it makes sense in this context */
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
/* this regex is from the RFC describing URI syntax -- can't recall the */
/* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */
#define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
#define SUBSTR_COUNT 9
#define PROTO 2
#define FQDN 4
#define PATH 5
int uri_parse(const char *uristr, uri *res)
{
int i;
int regerrcode;
char validp;
char *pathp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT+5];
char errbuf[BUFSIZ] = {0};
validp = 0;
if( (regerrcode = regcomp(&regexp, REGEX_URI, REG_EXTENDED)))
{
regerror(regerrcode, &regexp, errbuf, BUFSIZ);
fprintf(stderr, "regular expression error: %s\n", errbuf);
return(1);
}
if( REG_NOMATCH == (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
/* we only have a simple path */
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
/* /\* we have a more complex path *\/ */
/* if(0 != match[PATH+1].rm_so) */
/* { */
/* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
/* { */
/* /\* memory leak here that needs to be addressed *\/ */
/* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
/* } */
/* } */
}
return(0);
}