Compare commits

...

6 Commits

11 changed files with 535 additions and 283 deletions

21
CHANGES
View File

@ -1,3 +1,22 @@
2023/02/19 - v0.3.2
-Improved URI parsing again
-Improved HTTP redirect handling capability
-Moved TLS out of dial and into main
-Improved debugging experience by shuffling around if(param[V])s
2022/12/13 - v0.3.1b
-Fixed Gopher support
-Improved URI parsing
2022/12/12 - v0.3b
-Crashes in new, exciting ways!
-I couldn't get it to segfault, but ymmv.
-Has TLS still. It's less trash this time but still not great.
-Still smaller than hurl, still more readable.
-Still has Gopher, but the new main() breaks support. Will fix.
-Apparently doesn't work on BSD. Will get around to this.
-<<<!!!NEW!!!>>> Has HTTP redirect capabilities. Woaoaoaoao.
2022/12/7 - v0.2b
-It did segfault. Segfaults less now.
-Still has TLS.
@ -10,4 +29,4 @@
Features:
-Doesn't segfault. Probably.
-Has TLS.
-It's smaller and more readable than hurl. Nyaaah.
-It's smaller and more readable than hurl. Nyaaah

11
README
View File

@ -18,9 +18,11 @@ What's apportate's featureset, anyway?
======================================
Right now, it retrieves data over HTTP or Gopher, and can support TLS,
at least when it comes to HTTPS. I'd like a more general approach to
TLS support in the future.
TLS support in the future. You can build it with any implementation of
TLS so long as there's a libtls built atop it. Right now, that's Bear,
Libre, and Open at the least.
Its source code is small (< 400 lines of C total according to cloc right
Its source code is small (< 500 lines of C total according to cloc right
now) and uses only POSIX routines -- there's no temptation to use GNU
or BSD features because I develop against musl and tcc, and I don't even
have the Linux manpages on any of my systems.
@ -36,8 +38,5 @@ Apportate also aims to have actually useful diagnostics; that is,
compared to other tools, apportate aims to only provide useful error
output. In the case of success, it follows the Rule of Silence; on
unrecoverable errors, it aborts immediately. It supports multiple levels
of verbosity, and exposes almost all of its internals.
Its simple design and use should also make it relatively convenient
for inclusion in shell scripts.
of verbosity, and exposes almost all of its internal operations for debugging.

View File

@ -1,6 +1,8 @@
.POSIX:
LIBS = -ltls
SSL = bearssl
LIBS = -ltls -l$(SSL)
OBJ = \
main.c \

View File

@ -1,55 +1,26 @@
#include "headers.h"
int dial(const char *fqdn, const char *proto, struct tls **tlsres)
{
int sd;
struct addrinfo *ainfo;
struct tls_config *tlshints;
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
{
return(0);
}
if(getaddrinfo(fqdn, proto, 0, &ainfo))
{
return(0);
}
if(connect(sd, ainfo->ai_addr, sizeof(struct sockaddr)))
int dial(const char *fqdn, const char *proto)
{
return(0);
int sd;
struct addrinfo *ainfo;
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
{
return(0);
}
if(getaddrinfo(fqdn, proto, 0, &ainfo))
{
return(0);
}
if(connect(sd, ainfo->ai_addr, ainfo->ai_addrlen))
{
return(0);
}
return(sd);
}
if(tlsres != 0)
{
close(sd);
if( 0 == (*tlsres = tls_client()))
{
goto err_ssl;
}
if( 0 == (tlshints = tls_config_new()))
{
goto err_ssl;
}
if(tls_configure(*tlsres, tlshints))
{
goto err_ssl;
}
if( (tls_connect(*tlsres, fqdn, proto)))
{
goto err_ssl;
}
}
return(sd);
err_ssl:
return(0);
}

View File

@ -8,4 +8,4 @@
/* sd -- successful connection returns a file descriptor connected to the fqdn */
/* ERRCONN -- couldn't connect */
/* ERRADDR -- couldn't get addrinfo */
int dial(const char *fqdn, const char *proto, struct tls **tls_res);
int dial(const char *fqdn, const char *proto);

View File

@ -1,24 +1,91 @@
#include "headers.h"
const char REQ_HTTP[] =
{
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
};
{
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
};
int reqgen_http(const char *path, const char *fqdn, char **nbuf)
{
int buflen;
{
int buflen;
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
if( !(*nbuf = calloc(buflen, sizeof(char))))
{
return(ERRMEM);
}
if( !(*nbuf = calloc(buflen, sizeof(char))))
{
return(ERRMEM);
}
sprintf(*nbuf, REQ_HTTP, path, fqdn);
sprintf(*nbuf, REQ_HTTP, path, fqdn);
return(0);
}
return(0);
}
int resp_parse_http(char *data)
{
char *buf;
if( NULL == (buf = calloc(4, sizeof(char))))
{
goto err;
}
/* there are nine characters between the start of data and the first character of the response code */
data += 9;
memcpy(buf, data, 3);
return(atoi(buf));
err:
return(0);
}
/* http_header_extract - return the value corresponding to a key if it exists in an http response, otherwise null */
/* key - key to extract corresponding value */
/* data - response header from which to extract key's value */
char *http_get_keyval(char *key, char *data)
{
char *buf;
/* data indices */
char *d_ind, *d_ind2;
buf = NULL;
d_ind = d_ind2 = data;
/* we ensure that our key and each key we compare to are lower-case because some */
/* servers will return mixed-case keys and others single-case. by doing this we */
/* can use full key specifiers, which avoids false matches that would occur as a */
/* result of using partial key specifiers e.g "location" instead of "ocation". */
key = buftolower(key);
for(;*data != '\0'; data++)
{
if(NULL != buf)
{
free(buf);
buf = NULL;
for(; '\n' != *(data - 1); data++);
d_ind = d_ind2 = data;
}
for(; *d_ind != ':'; d_ind++);
buf = substr_extract(data, 0, (d_ind - data));
buf = buftolower(buf);
if(!strcmp(key, buf))
{
free(buf);
for(data = d_ind+2; *(1+d_ind) != '\n'; d_ind++);
buf = substr_extract(data, 0, (d_ind - data));
return(buf);
}
}
return(NULL);
}

View File

@ -1,2 +1,19 @@
int reqgen_http(const char *path, const char *fqdn, char **nbuf);int http_content_length(char *data);
char *http_skip_header(char *data, int recvlen);
int resp_parse_http(char *data);
/* http_header_extract -- function to return value corresponding to key */
/* key -- null-terminated buffer containing a key to retrieve a value of */
/* data -- null-terminated buffer containing a valid http response header */
/* return value -- NULL on failure, otherwise value corresponding to key in null-terminated buffer */
/* example --
if(uri_parse(http_header_extract("Location", recvbuf), &uri))
{
printf("redirecting to: %s\n", uri->host);
}
else
{
printf("failed to find a valid redirect. bailing out\n");
exit();
}
*/
char *http_get_keyval(char *key, char *data);

View File

@ -19,7 +19,17 @@ enum
/* 2: statistical. informational+size of header and body in bytes, round-trip time, etc */
/* 3+: debug. statistical+all internal state changes */
/* currently we only implement 1 */
V
V,
/* set redir limit (not currently implemented) */
R
};
enum
{
/* change this to alter how many redirect attempts should be tried */
/* todo: make this a command-line parameter? */
REDIR_LIM = 10
};
@ -29,17 +39,18 @@ char *outpath;
int main(int argc, char **argv)
{
int i, translen, gotheader;
int i, translen, redirnum, gotheader;
int sockd;
char *recvbuf;
char *recvbufp;
char *sendbufp, *offsetp, *errstr;
char *urip;
FILE *filed;
struct tls *tlsc;
uri uristruct;
i = translen = sockd = 0;
sendbufp = offsetp = 0;
if( (recvbuf = malloc(BUFSIZ+1)) == NULL)
if( (recvbufp = malloc(BUFSIZ+1)) == NULL)
{
errstr = "failed to init";
goto err;
@ -48,7 +59,11 @@ int main(int argc, char **argv)
filed = 0;
tlsc = NULL;
if(argc == 1)
{
goto usage;
}
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
{
switch(i)
@ -69,7 +84,7 @@ int main(int argc, char **argv)
break;
case 'v':
/* we handle v differently because we want to support different levels of verbosity */
/* we handle v differently because we want to support different levels of verbosity */
if(!param[Q])
{
param[V]++;
@ -77,140 +92,275 @@ int main(int argc, char **argv)
break;
default:
errstr = "[-vb] [-q] [-o <file>] uri";
goto usage;
}
}
urip = argv[optind];
/* we should probably modify uri_parse to return a zero value on failure... */
if(uri_parse(argv[optind], &uristruct))
for(redirnum = 0; redirnum < REDIR_LIM;)
{
errstr = "[-q|[-v -b]] [-o <file>] uri";
goto usage;
}
/* if outpath isn't set because we haven't received a -o param, */
/* then we should set the outpath to the final component of the */
/* URI. */
/* If we can't do that, we should just set it to 'default'. */
if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/')))))
{
outpath = "default";
}
/* generate our request */
if( !(sendbufp = reqgen(&uristruct)))
{
errstr = "Unknown protocol.";
goto err;
}
if(param[V])
{
fprintf(stderr, "request: %s\n", sendbufp);
}
/* having a routine or a global variable to track whether we need TLS would be */
/* nice to add in the future */
/* could probably store whether tls is necessary in a char within uristruct... */
/* Note: at the moment, if the TLS pointer passed is non-NULL, */
/* dial's return code can only be treated as an indicator of success. */
if(!strcmp("https", uristruct.proto))
{
sockd = dial(uristruct.fqdn, uristruct.proto, &tlsc);
}
else
{
sockd = dial(uristruct.fqdn, uristruct.proto, NULL);
}
if(!sockd)
{
errstr = "Couldn't connect to the host using the specified protocol.";
goto err;
}
if(tlsc)
{
translen = tls_write(tlsc, sendbufp, strlen(sendbufp));
}
else
{
translen = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(translen)
{
if(param[V])
{
printf("send: %d bytes\n", translen);
}
}
else
{
errstr = "Couldn't transmit data.";
goto err;
}
/* now for a slightly more complex version of the same routine: until we've encountered the */
/* delimiter, "\r\n\r\n", don't write to disk. Once we have, calculate the size of the body, */
/* then write that number of bytes to disk starting from the offset. Then, write everything */
/* until end of transmission. */
for(gotheader = 0, offsetp = NULL, translen = 1; translen > 0; memset(recvbuf, 0, BUFSIZ+1))
{
if( NULL == tlsc)
{
printf("recv: %d bytes\n", translen = recv(sockd, recvbuf, BUFSIZ, 0));
}
else
{
printf("recv: %d bytes\n", translen = tls_read(tlsc, recvbuf, BUFSIZ));
}
/* if we haven't gotten the header and our delimiter isn't in the */
/* received string, we're getting a multipart header and need to */
/* skip over it. */
/* right now we assume HTTP/S -- this is undesirable... */
if( strcmp("gopher", uristruct.proto) && !gotheader)
{
if( NULL == (offsetp = strstr(recvbuf, "\r\n\r\n")))
{
printf("continuing...\n");
continue;
}
else
{
gotheader = 1;
offsetp += 4;
printf("got the header\n");
}
}
start:
if( !filed && !(filed = fopen(outpath, "w")))
if(NULL != tlsc)
{
errstr = "couldn't open file";
tls_free(tlsc);
tlsc = NULL;
}
/* I don't care *what* you say, the system should *never* in a sane context return 0, 1, or 2 as a valid file descriptor */
/* libc reserves these */
if(2 < sockd)
{
close(sockd);
sockd = 0;
}
if(filed)
{
fclose(filed);
filed = NULL;
}
if(sendbufp)
{
free(sendbufp);
sendbufp = NULL;
}
/* todo: init uristruct as well */
uristruct.proto = NULL;
uristruct.fqdn = NULL;
uristruct.path = NULL;
if( uri_parse(urip, &uristruct))
{
errstr = "couldn't parse URI.";
goto err;
}
fwrite(offsetp, sizeof(char), (translen - (offsetp - recvbuf)), filed);
offsetp = recvbuf;
if(param[V] >= 2)
{
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
if(param[V] >= 3)
{
fprintf(stderr, "length of proto: %lu\nlength of fqdn: %lu\nlength of path: %lu\n", strlen(uristruct.proto), strlen(uristruct.fqdn), strlen(uristruct.path));
}
}
sendbufp = reqgen(&uristruct);
/* we should probably modify uri_parse to return a zero value on failure... */
if( NULL == (sendbufp = reqgen(&uristruct)))
{
fprintf(stderr, "couldn't generate request. Unknown protocol: %s?\n", uristruct.proto);
exit(-1);
}
/* if outpath isn't set because we haven't received a -o param, */
/* then we should set the outpath to the final component of the */
/* URI. */
/* If we can't do that, we should just set it to 'default'. */
/* todo: maybe add a check to ensure we don't overwrite? but if */
/* the user tells us to, who are we to question them? */
if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/')))))
{
outpath = "default";
}
if(param[V])
{
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
}
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto)))
{
errstr = "failed to connect";
goto err;
}
/* todo: upgrade this to a more general mechanism */
if(!strncmp("https", uristruct.proto, 5))
{
if(NULL != tlsc)
{
tls_reset(tlsc);
}
struct tls_config *config = tls_config_new();
tlsc = tls_client();
tls_configure(tlsc, config);
if(-1 == tls_connect_socket(tlsc, sockd, uristruct.fqdn))
{
errstr = "failed to upgrade connection to use TLS, aborting\n";
goto err;
}
}
if(param[V] >= 2)
{
fprintf(stderr, "Sending request...\n-----REQUEST START-----\n%s\n-----REQUEST END-----\n", sendbufp);
}
if(NULL != tlsc)
{
if(param[V])
{
fprintf(stderr, "writing over tls...\n");
}
if( -1 == (i = tls_write(tlsc, sendbufp, strlen(sendbufp))))
{
fprintf(stderr, "libtls internal error: ");
errstr = (char *) tls_error(tlsc);
goto err;
}
}
else
{
if(param[V])
{
fprintf(stderr, "writing over socket...\n");
}
i = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(param[V] >= 3)
{
fprintf(stderr, "sent: %d bytes\n", i);
}
/* actual read loop */
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
{
if(NULL == tlsc)
{
translen = recv(sockd, recvbufp, BUFSIZ, 0);
}
else
{
translen = tls_read(tlsc, recvbufp, BUFSIZ);
}
if(param[V] >= 3)
{
fprintf(stderr, "recv: %d bytes\n", translen);
}
/* parsing here? */
if(!strncmp(uristruct.proto, "http", 4) && !gotheader)
{
switch(resp_parse_http(recvbufp))
{
case -1:
if(param[V])
{
fprintf(stderr, "Response header parsing unnecessary, moving on...\n");
}
break;
case 200:
/* by now we have the first transmission from the server that we actually care about */
/* we just need to get the the end of the headres, now that we're done with 'em */
if(param[V] >= 3)
{
fprintf(stderr, "200 OKAY, moving to end of header...\n");
}
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
{
if(param[V] >= 2)
{
fprintf(stderr, "Searching to end of header...\n");
}
if(NULL != tlsc)
{
tls_read(tlsc, recvbufp, BUFSIZ);
}
else
{
recv(sockd, recvbufp, BUFSIZ, 0);
}
}
/* move forward four to get past the delimiter */
/* todo: add error checking in the case of never receiving a delimiter */
offsetp += 4;
gotheader = 1;
break;
/* intentional drop through from 301 to 302 */
case 301:
case 302:
urip = http_get_keyval("Location", recvbufp);
if(param[V])
{
fprintf(stderr, "Redirecting to %s...\n", urip);
}
redirnum++;
/* could use continue, but structured programming makes it easier to use goto in this circumstance... */
goto start;
case 400:
errstr = "400 Bad Request. Internal apport error?";
goto err;
default:
fprintf(stdout, "%s", recvbufp);
errstr = "invalid response from server.";
goto err;
}
}
if( !filed && NULL == (filed = fopen(outpath, "w")))
{
errstr = "couldn't open file.";
goto err;
}
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
if(param[V] >= 3)
{
fprintf(stderr, "fwrite: %d bytes\n", i);
}
offsetp = recvbufp;
}
tls_free(tlsc);
close(sockd);
fclose(filed);
free(sendbufp);
free(recvbufp);
break;
}
free(recvbuf);
close(sockd);
fclose(filed);
exit(EXIT_SUCCESS);
usage:
fprintf(stderr, "usage: %s [-qvb] [-o file] uri\n", argv[0]);
exit(EXIT_FAILURE);
err:
fprintf(stderr, "%s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
usage:
fprintf(stderr, "usage: %s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
}

View File

@ -2,59 +2,82 @@
/* return a properly formatted request for any implemented protocol */
char *reqgen(uri *urip)
{
char *req;
int is_tls = 0;
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
{
reqgen_http(urip->path, urip->fqdn, &req);
char *req;
if(!req)
{
return(NULL);
}
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
{
reqgen_http(urip->path, urip->fqdn, &req);
return(req);
}
else if(!strcmp(urip->proto, "gopher"))
{
reqgen_gopher(urip->path, &req);
if(!req)
{
return(NULL);
}
if(!req)
{
return(NULL);
}
return(req);
}
else if(!strcmp(urip->proto, "gopher"))
{
reqgen_gopher(urip->path, &req);
return(req);
if(!req)
{
return(NULL);
}
return(req);
}
return(NULL);
}
return(NULL);
}
void throw(int errcode)
{
fprintf(stderr, "apport: error %d.\n", errcode);
exit(errcode);
}
/* takes a data buffer and returns an integer corresponding to the server's response value */
/* if not applicable, return -1 */
/* if not found, return 0 */
int resp_parse(char *data, uri *uristruct)
{
if(strncmp("http", uristruct->proto, 4))
{
return(resp_parse_http(data));
}
else
{
return(-1);
}
}
/* return a pointer to a character array on the heap consisting of all bytes */
/* between start and end in str. */
char *substr_extract(const char *str, int start, int end)
{
int substr_len;
char *substr;
substr_len = (end - start);
substr = 0;
/* account for zero index plus the nullterm */
if( !(substr = malloc((substr_len + 1))))
{
return(NULL);
int substr_len;
char *substr;
substr_len = (end - start);
substr = NULL;
/* account for zero index plus the nullterm */
if( !(substr = calloc((substr_len + 1), sizeof(char))))
{
return(NULL);
}
memcpy(substr, str+start, substr_len);
return(substr);
}
memcpy(substr, str+start, substr_len);
return(substr);
}
char *buftolower(char *bufp)
{
int i;
char *nbufp;
nbufp = calloc(strlen(bufp), sizeof(char));
for(i = 0; '\0' != bufp[i]; i++)
{
nbufp[i] = tolower(bufp[i]);
}
return(nbufp);
}

View File

@ -35,5 +35,6 @@ typedef struct
char *reqgen(uri *urip);
void throw(int errcode);
int resp_parse(char *data, uri *uristruct);
char *substr_extract(const char *str, int start, int end);
char *buftolower(char *bufp);

View File

@ -2,59 +2,62 @@
/* really hate using the preprocessor, but it makes sense in this context */
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
/* this regex is from the RFC describing URI syntax -- can't recall the */
/* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */
#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
#define SUBSTR_COUNT 8
#define PROTO 1
#define FQDN 3
#define PATH 4
#define SUBSTR_COUNT 9
#define PROTO 2
#define FQDN 4
#define PATH 5
int uri_parse(const char *uristr, uri *res)
{
char validp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT];
{
int regerrcode;
char validp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT+5];
char errbuf[BUFSIZ] = {0};
validp = 0;
validp = 0;
regcomp(&regexp, REGEX_URI, REG_EXTENDED);
if( (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
if( (regerrcode = regcomp(&regexp, REGEX_URI, REG_EXTENDED)))
{
regerror(regerrcode, &regexp, errbuf, BUFSIZ);
fprintf(stderr, "regular expression error: %s\n", errbuf);
return(1);
}
if( REG_NOMATCH == (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
/* we only have a simple path */
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
}
return(0);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
}
return(0);
}