See CHANGES.

This commit is contained in:
Mid Favila 2023-02-19 16:44:19 -04:00
parent b5e1a30092
commit b35d49cc28
10 changed files with 184 additions and 184 deletions

View File

@ -1,3 +1,9 @@
2023/02/19 - v0.3.2
-Improved URI parsing again
-Improved HTTP redirect handling capability
-Moved TLS out of dial and into main
-Improved debugging experience by shuffling around if(param[V])s
2022/12/13 - v0.3.1b
-Fixed Gopher support
-Improved URI parsing

4
README
View File

@ -20,7 +20,7 @@ Right now, it retrieves data over HTTP or Gopher, and can support TLS,
at least when it comes to HTTPS. I'd like a more general approach to
TLS support in the future.
Its source code is small (< 400 lines of C total according to cloc right
Its source code is small (< 500 lines of C total according to cloc right
now) and uses only POSIX routines -- there's no temptation to use GNU
or BSD features because I develop against musl and tcc, and I don't even
have the Linux manpages on any of my systems.
@ -36,7 +36,7 @@ Apportate also aims to have actually useful diagnostics; that is,
compared to other tools, apportate aims to only provide useful error
output. In the case of success, it follows the Rule of Silence; on
unrecoverable errors, it aborts immediately. It supports multiple levels
of verbosity, and exposes almost all of its internals.
of verbosity, and exposes almost all of its internal operations for debugging..
Its simple design and use should also make it relatively convenient
for inclusion in shell scripts.

View File

@ -1,19 +1,18 @@
#include "headers.h"
int dial(const char *fqdn, const char *proto, struct tls **tlsres)
int dial(const char *fqdn, const char *proto)
{
int sd;
struct addrinfo *ainfo;
struct tls_config *tlshints;
if(getaddrinfo(fqdn, proto, 0, &ainfo))
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
{
return(0);
}
if( !(sd = socket(ainfo->ai_family, SOCK_STREAM, 0)))
if(getaddrinfo(fqdn, proto, 0, &ainfo))
{
return(0);
}
@ -23,34 +22,5 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres)
return(0);
}
if(tlsres != 0)
{
close(sd);
if( 0 == (*tlsres = tls_client()))
{
goto err_ssl;
}
if( 0 == (tlshints = tls_config_new()))
{
goto err_ssl;
}
if(tls_configure(*tlsres, tlshints))
{
goto err_ssl;
}
if( (tls_connect(*tlsres, fqdn, proto)))
{
goto err_ssl;
}
}
return(sd);
err_ssl:
return(0);
}

View File

@ -8,4 +8,4 @@
/* sd -- successful connection returns a file descriptor connected to the fqdn */
/* ERRCONN -- couldn't connect */
/* ERRADDR -- couldn't get addrinfo */
int dial(const char *fqdn, const char *proto, struct tls **tls_res);
int dial(const char *fqdn, const char *proto);

View File

@ -1,27 +1,27 @@
#include "headers.h"
const char REQ_HTTP[] =
{
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
};
{
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
};
int reqgen_http(const char *path, const char *fqdn, char **nbuf)
{
int buflen;
{
int buflen;
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
if( !(*nbuf = calloc(buflen, sizeof(char))))
{
return(ERRMEM);
}
if( !(*nbuf = calloc(buflen, sizeof(char))))
{
return(ERRMEM);
}
sprintf(*nbuf, REQ_HTTP, path, fqdn);
sprintf(*nbuf, REQ_HTTP, path, fqdn);
return(0);
}
return(0);
}
int resp_parse_http(char *data)
{
@ -41,32 +41,51 @@ int resp_parse_http(char *data)
return(0);
}
char *http_header_extract(char *key, char *data)
/* http_header_extract - return the value corresponding to a key if it exists in an http response, otherwise null */
/* key - key to extract corresponding value */
/* data - response header from which to extract key's value */
char *http_get_keyval(char *key, char *data)
{
char *keyp, *keyp_end;
char *returnp;
char *buf;
/* data indices */
char *d_ind, *d_ind2;
if( NULL == (keyp = strstr(data, key)))
{
goto err;
}
else
{
for(; 0 != *keyp && *keyp != ':'; keyp++);
for(; 0 != *keyp && !isalnum(*keyp); keyp++);
for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
buf = NULL;
d_ind = d_ind2 = data;
if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
/* we ensure that our key and each key we compare to are lower-case because some */
/* servers will return mixed-case keys and others single-case. by doing this we */
/* can use full key specifiers, which avoids false matches that would occur as a */
/* result of using partial key specifiers e.g "location" instead of "ocation". */
key = buftolower(key);
for(;*data != '\0'; data++)
{
if(NULL != buf)
{
goto err;
free(buf);
buf = NULL;
for(; '\n' != *(data - 1); data++);
d_ind = d_ind2 = data;
}
memcpy(returnp, keyp, (keyp_end - keyp));
for(; *d_ind != ':'; d_ind++);
buf = substr_extract(data, 0, (d_ind - data));
buf = buftolower(buf);
if(!strcmp(key, buf))
{
free(buf);
for(data = d_ind+2; *(1+d_ind) != '\n'; d_ind++);
buf = substr_extract(data, 0, (d_ind - data));
return(buf);
}
}
return(returnp);
err:
return(NULL);
}

View File

@ -16,4 +16,4 @@ int resp_parse_http(char *data);
exit();
}
*/
char *http_header_extract(char *key, char *data);
char *http_get_keyval(char *key, char *data);

View File

@ -36,7 +36,7 @@ char *outpath;
int main(int argc, char **argv)
{
int i, translen, redirnum;
int i, translen, redirnum, gotheader;
int sockd;
char *recvbufp;
char *sendbufp, *offsetp, *errstr;
@ -60,6 +60,7 @@ int main(int argc, char **argv)
{
goto usage;
}
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
{
switch(i)
@ -80,7 +81,7 @@ int main(int argc, char **argv)
break;
case 'v':
/* we handle v differently because we want to support different levels of verbosity */
/* we handle v differently because we want to support different levels of verbosity */
if(!param[Q])
{
param[V]++;
@ -88,7 +89,7 @@ int main(int argc, char **argv)
break;
default:
goto usage;
goto usage;
}
}
urip = argv[optind];
@ -125,26 +126,37 @@ int main(int argc, char **argv)
}
/* todo: init uristruct as well */
uristruct.proto = NULL;
uristruct.fqdn = NULL;
uristruct.path = NULL;
if( uri_parse(urip, &uristruct))
{
errstr = "couldn't parse URI.";
goto err;
}
{
errstr = "couldn't parse URI.";
goto err;
}
if(param[V])
if(param[V] >= 2)
{
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
if(param[V] >= 3)
{
fprintf(stderr, "length of proto: %lu\nlength of fqdn: %lu\nlength of path: %lu\n", strlen(uristruct.proto), strlen(uristruct.fqdn), strlen(uristruct.path));
}
}
sendbufp = reqgen(&uristruct);
/* we should probably modify uri_parse to return a zero value on failure... */
/* make errstr display the URI -- need mastrcat? */
if( NULL == (sendbufp = reqgen(&uristruct)))
{
errstr = "couldn't generate request. Unknown protocol?";
goto err;
fprintf(stderr, "couldn't generate request. Unknown protocol: %s?\n", uristruct.proto);
exit(-1);
}
/* if outpath isn't set because we haven't received a -o param, */
@ -158,57 +170,73 @@ int main(int argc, char **argv)
outpath = "default";
}
/* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
if(param[V])
{
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
}
/* definitely going to break tls out of dial, this is really bad */
if(!strcmp("https", uristruct.proto))
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto)))
{
if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
errstr = "failed to connect";
goto err;
}
/* todo: upgrade this to a more general mechanism */
if(!strncmp("https", uristruct.proto, 5))
{
if(NULL != tlsc)
{
errstr = "failed to connect";
goto err;
tls_reset(tlsc);
}
if(param[V])
struct tls_config *config = tls_config_new();
tlsc = tls_client();
tls_configure(tlsc, config);
if(-1 == tls_connect_socket(tlsc, sockd, uristruct.fqdn))
{
fprintf(stderr, "setting up TLS...\n");
}
}
else
{
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
{
errstr = "failed to connect";
errstr = "failed to upgrade connection to use TLS, aborting\n";
goto err;
}
}
if(param[V])
if(param[V] >= 2)
{
fprintf(stderr, "Sending request...\n %s\n", sendbufp);
fprintf(stderr, "Sending request...\n-----REQUEST START-----\n%s\n-----REQUEST END-----\n", sendbufp);
}
if(NULL != tlsc)
{
i = tls_write(tlsc, sendbufp, strlen(sendbufp));
if(param[V])
{
fprintf(stderr, "writing over tls...\n");
}
if( -1 == (i = tls_write(tlsc, sendbufp, strlen(sendbufp))))
{
fprintf(stderr, "libtls internal error: ");
errstr = (char *) tls_error(tlsc);
goto err;
}
}
else
{
if(param[V])
{
fprintf(stderr, "writing over socket...\n");
}
i = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "sent: %d bytes\n", i);
}
/* actual read loop */
int gotheader;
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
{
if(NULL == tlsc)
@ -220,7 +248,7 @@ int main(int argc, char **argv)
translen = tls_read(tlsc, recvbufp, BUFSIZ);
}
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "recv: %d bytes\n", translen);
}
@ -241,7 +269,7 @@ int main(int argc, char **argv)
case 200:
/* by now we have the first transmission from the server that we actually care about */
/* we just need to get the the end of the headres, now that we're done with 'em */
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "200 OKAY, moving to end of header...\n");
}
@ -249,7 +277,11 @@ int main(int argc, char **argv)
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
{
fprintf(stderr, "Searching to end of header...\n");
if(param[V] >= 2)
{
fprintf(stderr, "Searching to end of header...\n");
}
if(NULL != tlsc)
{
tls_read(tlsc, recvbufp, BUFSIZ);
@ -270,12 +302,14 @@ int main(int argc, char **argv)
/* intentional drop through from 301 to 302 */
case 301:
case 302:
urip = http_header_extract("ocation", recvbufp);
urip = http_get_keyval("Location", recvbufp);
if(param[V])
{
fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path);
fprintf(stderr, "Redirecting to %s...\n", urip);
}
redirnum++;
/* could use continue, but structured programming makes it easier to use goto in this circumstance... */
goto start;
case 400:
@ -297,10 +331,8 @@ int main(int argc, char **argv)
}
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
if(param[V] >= 2)
fprintf(stdout, "%s", recvbufp);
if(param[V])
if(param[V] >= 3)
{
fprintf(stderr, "fwrite: %d bytes\n", i);
}
@ -308,6 +340,7 @@ int main(int argc, char **argv)
offsetp = recvbufp;
}
tls_free(tlsc);
close(sockd);
fclose(filed);
free(sendbufp);
@ -327,3 +360,4 @@ int main(int argc, char **argv)
fprintf(stderr, "%s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
}

View File

@ -2,35 +2,34 @@
/* return a properly formatted request for any implemented protocol */
char *reqgen(uri *urip)
{
char *req;
int is_tls = 0;
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
{
reqgen_http(urip->path, urip->fqdn, &req);
char *req;
if(!req)
{
return(NULL);
}
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
{
reqgen_http(urip->path, urip->fqdn, &req);
return(req);
if(!req)
{
return(NULL);
}
return(req);
}
else if(!strcmp(urip->proto, "gopher"))
{
reqgen_gopher(urip->path, &req);
if(!req)
{
return(NULL);
}
return(req);
}
return(NULL);
}
else if(!strcmp(urip->proto, "gopher"))
{
reqgen_gopher(urip->path, &req);
if(!req)
{
return(NULL);
}
return(req);
}
return(NULL);
}
/* takes a data buffer and returns an integer corresponding to the server's response value */
/* if not applicable, return -1 */
@ -46,7 +45,7 @@ int resp_parse(char *data, uri *uristruct)
return(-1);
}
}
}
/* return a pointer to a character array on the heap consisting of all bytes */
/* between start and end in str. */
@ -59,35 +58,26 @@ char *substr_extract(const char *str, int start, int end)
substr = NULL;
/* account for zero index plus the nullterm */
if( !(substr = malloc((substr_len + 1))))
if( !(substr = calloc((substr_len + 1), sizeof(char))))
{
return(NULL);
}
memcpy(substr, str+start, substr_len);
memcpy(substr, str+start, substr_len);
return(substr);
}
/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */
/* and str2 joined end-to-end. */
char *mastrcat(char *str1, char *str2)
{
unsigned long int nbi, stri, nbsize;
char *nbuf;
nbi = stri = 0;
nbsize = (strlen(str1) + strlen(str2));
nbuf = malloc(nbsize);
for(stri = 0; str1[stri] != '\0'; nbi++, stri++)
{
nbuf[nbi] = str1[stri];
}
for(stri = 0; str2[stri] != '\0'; nbi++, stri++)
{
nbuf[nbi] = str2[stri];
}
return nbuf;
return(substr);
}
char *buftolower(char *bufp)
{
int i;
char *nbufp;
nbufp = calloc(strlen(bufp), sizeof(char));
for(i = 0; '\0' != bufp[i]; i++)
{
nbufp[i] = tolower(bufp[i]);
}
return(nbufp);
}

View File

@ -37,4 +37,4 @@ typedef struct
char *reqgen(uri *urip);
int resp_parse(char *data, uri *uristruct);
char *substr_extract(const char *str, int start, int end);
char *mastrcat(char *str1, char *str2);
char *buftolower(char *bufp);

View File

@ -2,14 +2,6 @@
/* really hate using the preprocessor, but it makes sense in this context */
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
/* this regex is from the RFC describing URI syntax -- can't recall the */
/* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */
#define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
@ -21,10 +13,8 @@
int uri_parse(const char *uristr, uri *res)
{
int i;
int regerrcode;
char validp;
char *pathp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT+5];
char errbuf[BUFSIZ] = {0};
@ -49,6 +39,7 @@ int uri_parse(const char *uristr, uri *res)
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
@ -61,16 +52,6 @@ int uri_parse(const char *uristr, uri *res)
{
/* we only have a simple path */
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
/* /\* we have a more complex path *\/ */
/* if(0 != match[PATH+1].rm_so) */
/* { */
/* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
/* { */
/* /\* memory leak here that needs to be addressed *\/ */
/* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
/* } */
/* } */
}