See CHANGES.
This commit is contained in:
parent
b5e1a30092
commit
b35d49cc28
6
CHANGES
6
CHANGES
@ -1,3 +1,9 @@
|
||||
2023/02/19 - v0.3.2
|
||||
-Improved URI parsing again
|
||||
-Improved HTTP redirect handling capability
|
||||
-Moved TLS out of dial and into main
|
||||
-Improved debugging experience by shuffling around if(param[V])s
|
||||
|
||||
2022/12/13 - v0.3.1b
|
||||
-Fixed Gopher support
|
||||
-Improved URI parsing
|
||||
|
4
README
4
README
@ -20,7 +20,7 @@ Right now, it retrieves data over HTTP or Gopher, and can support TLS,
|
||||
at least when it comes to HTTPS. I'd like a more general approach to
|
||||
TLS support in the future.
|
||||
|
||||
Its source code is small (< 400 lines of C total according to cloc right
|
||||
Its source code is small (< 500 lines of C total according to cloc right
|
||||
now) and uses only POSIX routines -- there's no temptation to use GNU
|
||||
or BSD features because I develop against musl and tcc, and I don't even
|
||||
have the Linux manpages on any of my systems.
|
||||
@ -36,7 +36,7 @@ Apportate also aims to have actually useful diagnostics; that is,
|
||||
compared to other tools, apportate aims to only provide useful error
|
||||
output. In the case of success, it follows the Rule of Silence; on
|
||||
unrecoverable errors, it aborts immediately. It supports multiple levels
|
||||
of verbosity, and exposes almost all of its internals.
|
||||
of verbosity, and exposes almost all of its internal operations for debugging..
|
||||
|
||||
Its simple design and use should also make it relatively convenient
|
||||
for inclusion in shell scripts.
|
||||
|
@ -1,19 +1,18 @@
|
||||
#include "headers.h"
|
||||
|
||||
|
||||
int dial(const char *fqdn, const char *proto, struct tls **tlsres)
|
||||
int dial(const char *fqdn, const char *proto)
|
||||
{
|
||||
int sd;
|
||||
struct addrinfo *ainfo;
|
||||
struct tls_config *tlshints;
|
||||
|
||||
|
||||
if(getaddrinfo(fqdn, proto, 0, &ainfo))
|
||||
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
|
||||
{
|
||||
return(0);
|
||||
}
|
||||
|
||||
if( !(sd = socket(ainfo->ai_family, SOCK_STREAM, 0)))
|
||||
if(getaddrinfo(fqdn, proto, 0, &ainfo))
|
||||
{
|
||||
return(0);
|
||||
}
|
||||
@ -23,34 +22,5 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres)
|
||||
return(0);
|
||||
}
|
||||
|
||||
if(tlsres != 0)
|
||||
{
|
||||
close(sd);
|
||||
|
||||
if( 0 == (*tlsres = tls_client()))
|
||||
{
|
||||
goto err_ssl;
|
||||
}
|
||||
|
||||
if( 0 == (tlshints = tls_config_new()))
|
||||
{
|
||||
goto err_ssl;
|
||||
}
|
||||
|
||||
if(tls_configure(*tlsres, tlshints))
|
||||
{
|
||||
goto err_ssl;
|
||||
}
|
||||
|
||||
|
||||
if( (tls_connect(*tlsres, fqdn, proto)))
|
||||
{
|
||||
goto err_ssl;
|
||||
}
|
||||
}
|
||||
|
||||
return(sd);
|
||||
|
||||
err_ssl:
|
||||
return(0);
|
||||
}
|
||||
|
@ -8,4 +8,4 @@
|
||||
/* sd -- successful connection returns a file descriptor connected to the fqdn */
|
||||
/* ERRCONN -- couldn't connect */
|
||||
/* ERRADDR -- couldn't get addrinfo */
|
||||
int dial(const char *fqdn, const char *proto, struct tls **tls_res);
|
||||
int dial(const char *fqdn, const char *proto);
|
||||
|
57
src/http.c
57
src/http.c
@ -41,32 +41,51 @@ int resp_parse_http(char *data)
|
||||
return(0);
|
||||
}
|
||||
|
||||
char *http_header_extract(char *key, char *data)
|
||||
/* http_header_extract - return the value corresponding to a key if it exists in an http response, otherwise null */
|
||||
/* key - key to extract corresponding value */
|
||||
/* data - response header from which to extract key's value */
|
||||
char *http_get_keyval(char *key, char *data)
|
||||
{
|
||||
char *keyp, *keyp_end;
|
||||
char *returnp;
|
||||
char *buf;
|
||||
/* data indices */
|
||||
char *d_ind, *d_ind2;
|
||||
|
||||
if( NULL == (keyp = strstr(data, key)))
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(; 0 != *keyp && *keyp != ':'; keyp++);
|
||||
for(; 0 != *keyp && !isalnum(*keyp); keyp++);
|
||||
for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
|
||||
buf = NULL;
|
||||
d_ind = d_ind2 = data;
|
||||
|
||||
if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
|
||||
|
||||
/* we ensure that our key and each key we compare to are lower-case because some */
|
||||
/* servers will return mixed-case keys and others single-case. by doing this we */
|
||||
/* can use full key specifiers, which avoids false matches that would occur as a */
|
||||
/* result of using partial key specifiers e.g "location" instead of "ocation". */
|
||||
key = buftolower(key);
|
||||
|
||||
|
||||
for(;*data != '\0'; data++)
|
||||
{
|
||||
goto err;
|
||||
if(NULL != buf)
|
||||
{
|
||||
free(buf);
|
||||
buf = NULL;
|
||||
|
||||
for(; '\n' != *(data - 1); data++);
|
||||
d_ind = d_ind2 = data;
|
||||
}
|
||||
|
||||
memcpy(returnp, keyp, (keyp_end - keyp));
|
||||
for(; *d_ind != ':'; d_ind++);
|
||||
buf = substr_extract(data, 0, (d_ind - data));
|
||||
|
||||
buf = buftolower(buf);
|
||||
|
||||
if(!strcmp(key, buf))
|
||||
{
|
||||
free(buf);
|
||||
for(data = d_ind+2; *(1+d_ind) != '\n'; d_ind++);
|
||||
|
||||
buf = substr_extract(data, 0, (d_ind - data));
|
||||
return(buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return(returnp);
|
||||
|
||||
err:
|
||||
return(NULL);
|
||||
}
|
||||
|
@ -16,4 +16,4 @@ int resp_parse_http(char *data);
|
||||
exit();
|
||||
}
|
||||
*/
|
||||
char *http_header_extract(char *key, char *data);
|
||||
char *http_get_keyval(char *key, char *data);
|
||||
|
104
src/main.c
104
src/main.c
@ -36,7 +36,7 @@ char *outpath;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i, translen, redirnum;
|
||||
int i, translen, redirnum, gotheader;
|
||||
int sockd;
|
||||
char *recvbufp;
|
||||
char *sendbufp, *offsetp, *errstr;
|
||||
@ -60,6 +60,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
goto usage;
|
||||
}
|
||||
|
||||
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
|
||||
{
|
||||
switch(i)
|
||||
@ -126,6 +127,9 @@ int main(int argc, char **argv)
|
||||
|
||||
/* todo: init uristruct as well */
|
||||
|
||||
uristruct.proto = NULL;
|
||||
uristruct.fqdn = NULL;
|
||||
uristruct.path = NULL;
|
||||
|
||||
if( uri_parse(urip, &uristruct))
|
||||
{
|
||||
@ -133,18 +137,26 @@ int main(int argc, char **argv)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if(param[V])
|
||||
if(param[V] >= 2)
|
||||
{
|
||||
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
|
||||
|
||||
if(param[V] >= 3)
|
||||
{
|
||||
fprintf(stderr, "length of proto: %lu\nlength of fqdn: %lu\nlength of path: %lu\n", strlen(uristruct.proto), strlen(uristruct.fqdn), strlen(uristruct.path));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sendbufp = reqgen(&uristruct);
|
||||
|
||||
|
||||
|
||||
/* we should probably modify uri_parse to return a zero value on failure... */
|
||||
/* make errstr display the URI -- need mastrcat? */
|
||||
if( NULL == (sendbufp = reqgen(&uristruct)))
|
||||
{
|
||||
errstr = "couldn't generate request. Unknown protocol?";
|
||||
goto err;
|
||||
fprintf(stderr, "couldn't generate request. Unknown protocol: %s?\n", uristruct.proto);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* if outpath isn't set because we haven't received a -o param, */
|
||||
@ -158,57 +170,73 @@ int main(int argc, char **argv)
|
||||
outpath = "default";
|
||||
}
|
||||
|
||||
/* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
|
||||
if(param[V])
|
||||
{
|
||||
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
|
||||
}
|
||||
|
||||
/* definitely going to break tls out of dial, this is really bad */
|
||||
if(!strcmp("https", uristruct.proto))
|
||||
{
|
||||
if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
|
||||
{
|
||||
errstr = "failed to connect";
|
||||
goto err;
|
||||
}
|
||||
|
||||
if(param[V])
|
||||
{
|
||||
fprintf(stderr, "setting up TLS...\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
|
||||
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
|
||||
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto)))
|
||||
{
|
||||
errstr = "failed to connect";
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* todo: upgrade this to a more general mechanism */
|
||||
if(!strncmp("https", uristruct.proto, 5))
|
||||
{
|
||||
if(NULL != tlsc)
|
||||
{
|
||||
tls_reset(tlsc);
|
||||
}
|
||||
|
||||
if(param[V])
|
||||
struct tls_config *config = tls_config_new();
|
||||
tlsc = tls_client();
|
||||
tls_configure(tlsc, config);
|
||||
|
||||
if(-1 == tls_connect_socket(tlsc, sockd, uristruct.fqdn))
|
||||
{
|
||||
fprintf(stderr, "Sending request...\n %s\n", sendbufp);
|
||||
errstr = "failed to upgrade connection to use TLS, aborting\n";
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if(param[V] >= 2)
|
||||
{
|
||||
fprintf(stderr, "Sending request...\n-----REQUEST START-----\n%s\n-----REQUEST END-----\n", sendbufp);
|
||||
}
|
||||
|
||||
if(NULL != tlsc)
|
||||
{
|
||||
i = tls_write(tlsc, sendbufp, strlen(sendbufp));
|
||||
if(param[V])
|
||||
{
|
||||
fprintf(stderr, "writing over tls...\n");
|
||||
}
|
||||
|
||||
if( -1 == (i = tls_write(tlsc, sendbufp, strlen(sendbufp))))
|
||||
{
|
||||
fprintf(stderr, "libtls internal error: ");
|
||||
errstr = (char *) tls_error(tlsc);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(param[V])
|
||||
{
|
||||
fprintf(stderr, "writing over socket...\n");
|
||||
}
|
||||
|
||||
i = send(sockd, sendbufp, strlen(sendbufp), 0);
|
||||
}
|
||||
|
||||
if(param[V])
|
||||
if(param[V] >= 3)
|
||||
{
|
||||
fprintf(stderr, "sent: %d bytes\n", i);
|
||||
}
|
||||
|
||||
/* actual read loop */
|
||||
int gotheader;
|
||||
|
||||
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
|
||||
{
|
||||
if(NULL == tlsc)
|
||||
@ -220,7 +248,7 @@ int main(int argc, char **argv)
|
||||
translen = tls_read(tlsc, recvbufp, BUFSIZ);
|
||||
}
|
||||
|
||||
if(param[V])
|
||||
if(param[V] >= 3)
|
||||
{
|
||||
fprintf(stderr, "recv: %d bytes\n", translen);
|
||||
}
|
||||
@ -241,15 +269,19 @@ int main(int argc, char **argv)
|
||||
case 200:
|
||||
/* by now we have the first transmission from the server that we actually care about */
|
||||
/* we just need to get the the end of the headres, now that we're done with 'em */
|
||||
if(param[V])
|
||||
if(param[V] >= 3)
|
||||
{
|
||||
fprintf(stderr, "200 OKAY, moving to end of header...\n");
|
||||
}
|
||||
|
||||
|
||||
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
|
||||
{
|
||||
if(param[V] >= 2)
|
||||
{
|
||||
fprintf(stderr, "Searching to end of header...\n");
|
||||
}
|
||||
|
||||
if(NULL != tlsc)
|
||||
{
|
||||
tls_read(tlsc, recvbufp, BUFSIZ);
|
||||
@ -270,12 +302,14 @@ int main(int argc, char **argv)
|
||||
/* intentional drop through from 301 to 302 */
|
||||
case 301:
|
||||
case 302:
|
||||
urip = http_header_extract("ocation", recvbufp);
|
||||
urip = http_get_keyval("Location", recvbufp);
|
||||
if(param[V])
|
||||
{
|
||||
fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path);
|
||||
fprintf(stderr, "Redirecting to %s...\n", urip);
|
||||
}
|
||||
|
||||
redirnum++;
|
||||
/* could use continue, but structured programming makes it easier to use goto in this circumstance... */
|
||||
goto start;
|
||||
|
||||
case 400:
|
||||
@ -297,10 +331,8 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
|
||||
if(param[V] >= 2)
|
||||
fprintf(stdout, "%s", recvbufp);
|
||||
|
||||
if(param[V])
|
||||
if(param[V] >= 3)
|
||||
{
|
||||
fprintf(stderr, "fwrite: %d bytes\n", i);
|
||||
}
|
||||
@ -308,6 +340,7 @@ int main(int argc, char **argv)
|
||||
offsetp = recvbufp;
|
||||
}
|
||||
|
||||
tls_free(tlsc);
|
||||
close(sockd);
|
||||
fclose(filed);
|
||||
free(sendbufp);
|
||||
@ -327,3 +360,4 @@ int main(int argc, char **argv)
|
||||
fprintf(stderr, "%s: %s\n", argv[0], errstr);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,6 @@
|
||||
char *reqgen(uri *urip)
|
||||
{
|
||||
char *req;
|
||||
int is_tls = 0;
|
||||
|
||||
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
|
||||
{
|
||||
@ -59,7 +58,7 @@ char *substr_extract(const char *str, int start, int end)
|
||||
substr = NULL;
|
||||
|
||||
/* account for zero index plus the nullterm */
|
||||
if( !(substr = malloc((substr_len + 1))))
|
||||
if( !(substr = calloc((substr_len + 1), sizeof(char))))
|
||||
{
|
||||
return(NULL);
|
||||
}
|
||||
@ -69,25 +68,16 @@ char *substr_extract(const char *str, int start, int end)
|
||||
return(substr);
|
||||
}
|
||||
|
||||
/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */
|
||||
/* and str2 joined end-to-end. */
|
||||
char *mastrcat(char *str1, char *str2)
|
||||
char *buftolower(char *bufp)
|
||||
{
|
||||
unsigned long int nbi, stri, nbsize;
|
||||
char *nbuf;
|
||||
nbi = stri = 0;
|
||||
nbsize = (strlen(str1) + strlen(str2));
|
||||
nbuf = malloc(nbsize);
|
||||
int i;
|
||||
char *nbufp;
|
||||
nbufp = calloc(strlen(bufp), sizeof(char));
|
||||
|
||||
for(stri = 0; str1[stri] != '\0'; nbi++, stri++)
|
||||
for(i = 0; '\0' != bufp[i]; i++)
|
||||
{
|
||||
nbuf[nbi] = str1[stri];
|
||||
nbufp[i] = tolower(bufp[i]);
|
||||
}
|
||||
|
||||
for(stri = 0; str2[stri] != '\0'; nbi++, stri++)
|
||||
{
|
||||
nbuf[nbi] = str2[stri];
|
||||
}
|
||||
|
||||
return nbuf;
|
||||
return(nbufp);
|
||||
}
|
||||
|
@ -37,4 +37,4 @@ typedef struct
|
||||
char *reqgen(uri *urip);
|
||||
int resp_parse(char *data, uri *uristruct);
|
||||
char *substr_extract(const char *str, int start, int end);
|
||||
char *mastrcat(char *str1, char *str2);
|
||||
char *buftolower(char *bufp);
|
||||
|
21
src/uri.c
21
src/uri.c
@ -2,14 +2,6 @@
|
||||
|
||||
|
||||
/* really hate using the preprocessor, but it makes sense in this context */
|
||||
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
|
||||
/* this regex is from the RFC describing URI syntax -- can't recall the */
|
||||
/* exact one right now. anyway, it's a little too general for my tastes, */
|
||||
/* but the one I came up with was trash (unsurprisingly) so here we are. */
|
||||
/* need to modify this in the future to be less liberal... */
|
||||
#define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
|
||||
#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
|
||||
#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
|
||||
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
|
||||
|
||||
|
||||
@ -21,10 +13,8 @@
|
||||
|
||||
int uri_parse(const char *uristr, uri *res)
|
||||
{
|
||||
int i;
|
||||
int regerrcode;
|
||||
char validp;
|
||||
char *pathp;
|
||||
regex_t regexp;
|
||||
regmatch_t match[SUBSTR_COUNT+5];
|
||||
char errbuf[BUFSIZ] = {0};
|
||||
@ -49,6 +39,7 @@ int uri_parse(const char *uristr, uri *res)
|
||||
/* with experience. */
|
||||
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
|
||||
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
|
||||
|
||||
/* if the difference below is less than 1, our path doesn't exist. */
|
||||
/* Compensate by setting it to '/' which will always return a root */
|
||||
/* document from an HTTP server -- and, presumably, others. We'll */
|
||||
@ -61,16 +52,6 @@ int uri_parse(const char *uristr, uri *res)
|
||||
{
|
||||
/* we only have a simple path */
|
||||
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
|
||||
/* /\* we have a more complex path *\/ */
|
||||
/* if(0 != match[PATH+1].rm_so) */
|
||||
/* { */
|
||||
/* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
|
||||
/* { */
|
||||
/* /\* memory leak here that needs to be addressed *\/ */
|
||||
/* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
|
||||
/* } */
|
||||
/* } */
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user