Compare commits

..

6 Commits

11 changed files with 267 additions and 211 deletions

12
CHANGES
View File

@ -1,3 +1,13 @@
2023/02/19 - v0.3.2
-Improved URI parsing again
-Improved HTTP redirect handling capability
-Moved TLS out of dial and into main
-Improved debugging experience by shuffling around if(param[V])s
2022/12/13 - v0.3.1b
-Fixed Gopher support
-Improved URI parsing
2022/12/12 - v0.3b
-Crashes in new, exciting ways!
-I couldn't get it to segfault, but ymmv.
@ -19,4 +29,4 @@
Features:
-Doesn't segfault. Probably.
-Has TLS.
-It's smaller and more readable than hurl. Nyaaah.
-It's smaller and more readable than hurl. Nyaaah

11
README
View File

@ -18,9 +18,11 @@ What's apportate's featureset, anyway?
======================================
Right now, it retrieves data over HTTP or Gopher, and can support TLS,
at least when it comes to HTTPS. I'd like a more general approach to
TLS support in the future.
TLS support in the future. You can build it with any implementation of
TLS so long as there's a libtls built atop it. Right now, that's Bear,
Libre, and Open at the least.
Its source code is small (< 400 lines of C total according to cloc right
Its source code is small (< 500 lines of C total according to cloc right
now) and uses only POSIX routines -- there's no temptation to use GNU
or BSD features because I develop against musl and tcc, and I don't even
have the Linux manpages on any of my systems.
@ -36,8 +38,5 @@ Apportate also aims to have actually useful diagnostics; that is,
compared to other tools, apportate aims to only provide useful error
output. In the case of success, it follows the Rule of Silence; on
unrecoverable errors, it aborts immediately. It supports multiple levels
of verbosity, and exposes almost all of its internals.
Its simple design and use should also make it relatively convenient
for inclusion in shell scripts.
of verbosity, and exposes almost all of its internal operations for debugging.

View File

@ -1,6 +1,8 @@
.POSIX:
LIBS = -ltls
SSL = bearssl
LIBS = -ltls -l$(SSL)
OBJ = \
main.c \

View File

@ -1,56 +1,26 @@
#include "headers.h"
int dial(const char *fqdn, const char *proto, struct tls **tlsres)
int dial(const char *fqdn, const char *proto)
{
int sd;
struct addrinfo *ainfo;
struct tls_config *tlshints;
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
{
return(0);
}
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
{
return(0);
}
if(getaddrinfo(fqdn, proto, 0, &ainfo))
{
return(0);
}
if(getaddrinfo(fqdn, proto, 0, &ainfo))
{
return(0);
}
if(connect(sd, ainfo->ai_addr, sizeof(struct sockaddr)))
{
return(0);
if(connect(sd, ainfo->ai_addr, ainfo->ai_addrlen))
{
return(0);
}
return(sd);
}
if(tlsres != 0)
{
close(sd);
if( 0 == (*tlsres = tls_client()))
{
goto err_ssl;
}
if( 0 == (tlshints = tls_config_new()))
{
goto err_ssl;
}
if(tls_configure(*tlsres, tlshints))
{
goto err_ssl;
}
if( (tls_connect(*tlsres, fqdn, proto)))
{
goto err_ssl;
}
}
return(sd);
err_ssl:
return(0);
}

View File

@ -8,4 +8,4 @@
/* sd -- successful connection returns a file descriptor connected to the fqdn */
/* ERRCONN -- couldn't connect */
/* ERRADDR -- couldn't get addrinfo */
int dial(const char *fqdn, const char *proto, struct tls **tls_res);
int dial(const char *fqdn, const char *proto);

View File

@ -1,27 +1,27 @@
#include "headers.h"
const char REQ_HTTP[] =
{
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
};
{
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
};
int reqgen_http(const char *path, const char *fqdn, char **nbuf)
{
int buflen;
{
int buflen;
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
if( !(*nbuf = calloc(buflen, sizeof(char))))
{
return(ERRMEM);
}
if( !(*nbuf = calloc(buflen, sizeof(char))))
{
return(ERRMEM);
}
sprintf(*nbuf, REQ_HTTP, path, fqdn);
sprintf(*nbuf, REQ_HTTP, path, fqdn);
return(0);
}
return(0);
}
int resp_parse_http(char *data)
{
@ -41,32 +41,51 @@ int resp_parse_http(char *data)
return(0);
}
char *http_header_extract(char *key, char *data)
/* http_header_extract - return the value corresponding to a key if it exists in an http response, otherwise null */
/* key - key to extract corresponding value */
/* data - response header from which to extract key's value */
char *http_get_keyval(char *key, char *data)
{
char *keyp, *keyp_end;
char *returnp;
char *buf;
/* data indices */
char *d_ind, *d_ind2;
if( NULL == (keyp = strstr(data, key)))
{
goto err;
}
else
{
for(; 0 != *keyp && *keyp != ':'; keyp++);
for(; 0 != *keyp && !isalnum(*keyp); keyp++);
for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
buf = NULL;
d_ind = d_ind2 = data;
if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
/* we ensure that our key and each key we compare to are lower-case because some */
/* servers will return mixed-case keys and others single-case. by doing this we */
/* can use full key specifiers, which avoids false matches that would occur as a */
/* result of using partial key specifiers e.g "location" instead of "ocation". */
key = buftolower(key);
for(;*data != '\0'; data++)
{
if(NULL != buf)
{
goto err;
free(buf);
buf = NULL;
for(; '\n' != *(data - 1); data++);
d_ind = d_ind2 = data;
}
memcpy(returnp, keyp, (keyp_end - keyp));
for(; *d_ind != ':'; d_ind++);
buf = substr_extract(data, 0, (d_ind - data));
buf = buftolower(buf);
if(!strcmp(key, buf))
{
free(buf);
for(data = d_ind+2; *(1+d_ind) != '\n'; d_ind++);
buf = substr_extract(data, 0, (d_ind - data));
return(buf);
}
}
return(returnp);
err:
return(NULL);
}

View File

@ -16,4 +16,4 @@ int resp_parse_http(char *data);
exit();
}
*/
char *http_header_extract(char *key, char *data);
char *http_get_keyval(char *key, char *data);

View File

@ -19,7 +19,10 @@ enum
/* 2: statistical. informational+size of header and body in bytes, round-trip time, etc */
/* 3+: debug. statistical+all internal state changes */
/* currently we only implement 1 */
V
V,
/* set redir limit (not currently implemented) */
R
};
enum
@ -36,7 +39,7 @@ char *outpath;
int main(int argc, char **argv)
{
int i, translen, redirnum;
int i, translen, redirnum, gotheader;
int sockd;
char *recvbufp;
char *sendbufp, *offsetp, *errstr;
@ -60,6 +63,7 @@ int main(int argc, char **argv)
{
goto usage;
}
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
{
switch(i)
@ -80,7 +84,7 @@ int main(int argc, char **argv)
break;
case 'v':
/* we handle v differently because we want to support different levels of verbosity */
/* we handle v differently because we want to support different levels of verbosity */
if(!param[Q])
{
param[V]++;
@ -88,7 +92,7 @@ int main(int argc, char **argv)
break;
default:
goto usage;
goto usage;
}
}
urip = argv[optind];
@ -125,26 +129,37 @@ int main(int argc, char **argv)
}
/* todo: init uristruct as well */
uristruct.proto = NULL;
uristruct.fqdn = NULL;
uristruct.path = NULL;
if( uri_parse(urip, &uristruct))
{
errstr = "couldn't parse URI.";
goto err;
}
if(param[V])
if(param[V] >= 2)
{
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
if(param[V] >= 3)
{
fprintf(stderr, "length of proto: %lu\nlength of fqdn: %lu\nlength of path: %lu\n", strlen(uristruct.proto), strlen(uristruct.fqdn), strlen(uristruct.path));
}
}
sendbufp = reqgen(&uristruct);
/* we should probably modify uri_parse to return a zero value on failure... */
/* make errstr display the URI -- need mastrcat? */
if( NULL == (sendbufp = reqgen(&uristruct)))
{
errstr = "couldn't generate request. Unknown protocol?";
goto err;
fprintf(stderr, "couldn't generate request. Unknown protocol: %s?\n", uristruct.proto);
exit(-1);
}
/* if outpath isn't set because we haven't received a -o param, */
@ -158,57 +173,73 @@ int main(int argc, char **argv)
outpath = "default";
}
/* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
if(param[V])
{
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
}
/* definitely going to break tls out of dial, this is really bad */
if(!strcmp("https", uristruct.proto))
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto)))
{
if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
errstr = "failed to connect";
goto err;
}
/* todo: upgrade this to a more general mechanism */
if(!strncmp("https", uristruct.proto, 5))
{
if(NULL != tlsc)
{
errstr = "failed to connect";
goto err;
tls_reset(tlsc);
}
if(param[V])
struct tls_config *config = tls_config_new();
tlsc = tls_client();
tls_configure(tlsc, config);
if(-1 == tls_connect_socket(tlsc, sockd, uristruct.fqdn))
{
fprintf(stderr, "setting up TLS...\n");
}
}
else
{
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
{
errstr = "failed to connect";
errstr = "failed to upgrade connection to use TLS, aborting\n";
goto err;
}
}
if(param[V])
if(param[V] >= 2)
{
fprintf(stderr, "Sending request...\n %s\n", sendbufp);
fprintf(stderr, "Sending request...\n-----REQUEST START-----\n%s\n-----REQUEST END-----\n", sendbufp);
}
if(NULL != tlsc)
{
i = tls_write(tlsc, sendbufp, strlen(sendbufp));
if(param[V])
{
fprintf(stderr, "writing over tls...\n");
}
if( -1 == (i = tls_write(tlsc, sendbufp, strlen(sendbufp))))
{
fprintf(stderr, "libtls internal error: ");
errstr = (char *) tls_error(tlsc);
goto err;
}
}
else
{
if(param[V])
{
fprintf(stderr, "writing over socket...\n");
}
i = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "sent: %d bytes\n", i);
}
/* actual read loop */
int gotheader;
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
{
if(NULL == tlsc)
@ -220,13 +251,13 @@ int main(int argc, char **argv)
translen = tls_read(tlsc, recvbufp, BUFSIZ);
}
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "recv: %d bytes\n", translen);
}
/* parsing here? */
if(!gotheader)
if(!strncmp(uristruct.proto, "http", 4) && !gotheader)
{
switch(resp_parse_http(recvbufp))
{
@ -241,7 +272,7 @@ int main(int argc, char **argv)
case 200:
/* by now we have the first transmission from the server that we actually care about */
/* we just need to get the the end of the headres, now that we're done with 'em */
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "200 OKAY, moving to end of header...\n");
}
@ -249,7 +280,11 @@ int main(int argc, char **argv)
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
{
fprintf(stderr, "Searching to end of header...\n");
if(param[V] >= 2)
{
fprintf(stderr, "Searching to end of header...\n");
}
if(NULL != tlsc)
{
tls_read(tlsc, recvbufp, BUFSIZ);
@ -270,9 +305,14 @@ int main(int argc, char **argv)
/* intentional drop through from 301 to 302 */
case 301:
case 302:
urip = http_header_extract("ocation", recvbufp);
fprintf(stderr, "R E D I R E C T I N G ! ! !\n");
urip = http_get_keyval("Location", recvbufp);
if(param[V])
{
fprintf(stderr, "Redirecting to %s...\n", urip);
}
redirnum++;
/* could use continue, but structured programming makes it easier to use goto in this circumstance... */
goto start;
case 400:
@ -294,10 +334,8 @@ int main(int argc, char **argv)
}
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
if(param[V] >= 2)
fprintf(stdout, "%s", recvbufp);
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "fwrite: %d bytes\n", i);
}
@ -305,6 +343,7 @@ int main(int argc, char **argv)
offsetp = recvbufp;
}
tls_free(tlsc);
close(sockd);
fclose(filed);
free(sendbufp);
@ -324,3 +363,4 @@ int main(int argc, char **argv)
fprintf(stderr, "%s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
}

View File

@ -2,40 +2,38 @@
/* return a properly formatted request for any implemented protocol */
char *reqgen(uri *urip)
{
char *req;
int is_tls = 0;
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
{
reqgen_http(urip->path, urip->fqdn, &req);
char *req;
if(!req)
{
return(NULL);
}
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
{
reqgen_http(urip->path, urip->fqdn, &req);
return(req);
if(!req)
{
return(NULL);
}
return(req);
}
else if(!strcmp(urip->proto, "gopher"))
{
reqgen_gopher(urip->path, &req);
if(!req)
{
return(NULL);
}
return(req);
}
return(NULL);
}
else if(!strcmp(urip->proto, "gopher"))
{
reqgen_gopher(urip->path, &req);
if(!req)
{
return(NULL);
}
return(req);
}
return(NULL);
}
/* takes a data buffer and returns an integer corresponding to the server's response value */
/* if not applicable, return -1 */
/* if not found, return 0 */
/* TODO: fix erroneous implementation */
int resp_parse(char *data, uri *uristruct)
{
if(strncmp("http", uristruct->proto, 4))
@ -47,25 +45,39 @@ int resp_parse(char *data, uri *uristruct)
return(-1);
}
}
}
/* return a pointer to a character array on the heap consisting of all bytes */
/* between start and end in str. */
char *substr_extract(const char *str, int start, int end)
{
int substr_len;
char *substr;
substr_len = (end - start);
substr = 0;
/* account for zero index plus the nullterm */
if( !(substr = malloc((substr_len + 1))))
{
return(NULL);
int substr_len;
char *substr;
substr_len = (end - start);
substr = NULL;
/* account for zero index plus the nullterm */
if( !(substr = calloc((substr_len + 1), sizeof(char))))
{
return(NULL);
}
memcpy(substr, str+start, substr_len);
return(substr);
}
memcpy(substr, str+start, substr_len);
return(substr);
}
char *buftolower(char *bufp)
{
int i;
char *nbufp;
nbufp = calloc(strlen(bufp), sizeof(char));
for(i = 0; '\0' != bufp[i]; i++)
{
nbufp[i] = tolower(bufp[i]);
}
return(nbufp);
}

View File

@ -37,3 +37,4 @@ typedef struct
char *reqgen(uri *urip);
int resp_parse(char *data, uri *uristruct);
char *substr_extract(const char *str, int start, int end);
char *buftolower(char *bufp);

View File

@ -2,59 +2,62 @@
/* really hate using the preprocessor, but it makes sense in this context */
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
/* this regex is from the RFC describing URI syntax -- can't recall the */
/* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */
#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
#define SUBSTR_COUNT 8
#define PROTO 1
#define FQDN 3
#define PATH 4
#define SUBSTR_COUNT 9
#define PROTO 2
#define FQDN 4
#define PATH 5
int uri_parse(const char *uristr, uri *res)
{
char validp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT];
{
int regerrcode;
char validp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT+5];
char errbuf[BUFSIZ] = {0};
validp = 0;
validp = 0;
regcomp(&regexp, REGEX_URI, REG_EXTENDED);
if( (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
if( (regerrcode = regcomp(&regexp, REGEX_URI, REG_EXTENDED)))
{
regerror(regerrcode, &regexp, errbuf, BUFSIZ);
fprintf(stderr, "regular expression error: %s\n", errbuf);
return(1);
}
if( REG_NOMATCH == (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
/* we only have a simple path */
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
}
return(0);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
}
return(0);
}