See CHANGES.

This commit is contained in:
Mid Favila 2022-12-13 13:12:07 -04:00
parent 61415b6069
commit 8bba6e9f60
9 changed files with 435 additions and 185 deletions

15
CHANGES
View File

@ -1,3 +1,16 @@
2022/12/13 - v0.3.1b
-Fixed Gopher support
-Improved URI parsing
2022/12/12 - v0.3b
-Crashes in new, exciting ways!
-I couldn't get it to segfault, but ymmv.
-Has TLS still. It's less trash this time but still not great.
-Still smaller than hurl, still more readable.
-Still has Gopher, but the new main() breaks support. Will fix.
-Apparently doesn't work on BSD. Will get around to this.
-<<<!!!NEW!!!>>> Has HTTP redirect capabilities. Woaoaoaoao.
2022/12/7 - v0.2b 2022/12/7 - v0.2b
-It did segfault. Segfaults less now. -It did segfault. Segfaults less now.
-Still has TLS. -Still has TLS.
@ -10,4 +23,4 @@
Features: Features:
-Doesn't segfault. Probably. -Doesn't segfault. Probably.
-Has TLS. -Has TLS.
-It's smaller and more readable than hurl. Nyaaah. -It's smaller and more readable than hurl. Nyaaah

View File

@ -1,6 +1,8 @@
.POSIX: .POSIX:
LIBS = -ltls SSL = bearssl
LIBS = -ltls -l$(SSL)
OBJ = \ OBJ = \
main.c \ main.c \

View File

@ -1,5 +1,6 @@
#include "headers.h" #include "headers.h"
int dial(const char *fqdn, const char *proto, struct tls **tlsres) int dial(const char *fqdn, const char *proto, struct tls **tlsres)
{ {
int sd; int sd;
@ -17,7 +18,7 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres)
return(0); return(0);
} }
if(connect(sd, ainfo->ai_addr, sizeof(struct sockaddr))) if(connect(sd, ainfo->ai_addr, ainfo->ai_addrlen))
{ {
return(0); return(0);
} }

View File

@ -22,3 +22,51 @@ int reqgen_http(const char *path, const char *fqdn, char **nbuf)
return(0); return(0);
} }
int resp_parse_http(char *data)
{
char *buf;
if( NULL == (buf = calloc(4, sizeof(char))))
{
goto err;
}
/* there are nine characters between the start of data and the first character of the response code */
data += 9;
memcpy(buf, data, 3);
return(atoi(buf));
err:
return(0);
}
char *http_header_extract(char *key, char *data)
{
char *keyp, *keyp_end;
char *returnp;
if( NULL == (keyp = strstr(data, key)))
{
goto err;
}
else
{
for(; 0 != *keyp && *keyp != ':'; keyp++);
for(; 0 != *keyp && !isalnum(*keyp); keyp++);
for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
{
goto err;
}
memcpy(returnp, keyp, (keyp_end - keyp));
}
return(returnp);
err:
return(NULL);
}

View File

@ -1,2 +1,19 @@
int reqgen_http(const char *path, const char *fqdn, char **nbuf);int http_content_length(char *data); int reqgen_http(const char *path, const char *fqdn, char **nbuf);int http_content_length(char *data);
char *http_skip_header(char *data, int recvlen); int resp_parse_http(char *data);
/* http_header_extract -- function to return value corresponding to key */
/* key -- null-terminated buffer containing a key to retrieve a value of */
/* data -- null-terminated buffer containing a valid http response header */
/* return value -- NULL on failure, otherwise value corresponding to key in null-terminated buffer */
/* example --
if(uri_parse(http_header_extract("Location", recvbuf), &uri))
{
printf("redirecting to: %s\n", uri->host);
}
else
{
printf("failed to find a valid redirect. bailing out\n");
exit();
}
*/
char *http_header_extract(char *key, char *data);

View File

@ -22,6 +22,13 @@ enum
V V
}; };
enum
{
/* change this to alter how many redirect attempts should be tried */
/* todo: make this a command-line parameter? */
REDIR_LIM = 10
};
char param[4] = {0}; char param[4] = {0};
/* below is to accompany param[O] */ /* below is to accompany param[O] */
@ -29,17 +36,18 @@ char *outpath;
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int i, translen, gotheader; int i, translen, redirnum;
int sockd; int sockd;
char *recvbuf; char *recvbufp;
char *sendbufp, *offsetp, *errstr; char *sendbufp, *offsetp, *errstr;
char *urip;
FILE *filed; FILE *filed;
struct tls *tlsc; struct tls *tlsc;
uri uristruct; uri uristruct;
i = translen = sockd = 0; i = translen = sockd = 0;
sendbufp = offsetp = 0; sendbufp = offsetp = 0;
if( (recvbuf = malloc(BUFSIZ+1)) == NULL) if( (recvbufp = malloc(BUFSIZ+1)) == NULL)
{ {
errstr = "failed to init"; errstr = "failed to init";
goto err; goto err;
@ -48,7 +56,10 @@ int main(int argc, char **argv)
filed = 0; filed = 0;
tlsc = NULL; tlsc = NULL;
if(argc == 1)
{
goto usage;
}
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0) for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
{ {
switch(i) switch(i)
@ -77,140 +88,242 @@ int main(int argc, char **argv)
break; break;
default: default:
errstr = "[-vb] [-q] [-o <file>] uri";
goto usage; goto usage;
} }
} }
urip = argv[optind];
for(redirnum = 0; redirnum < REDIR_LIM;)
{
start:
if(NULL != tlsc)
{
tls_free(tlsc);
tlsc = NULL;
}
/* I don't care *what* you say, the system should *never* in a sane context return 0, 1, or 2 as a valid file descriptor */
/* libc reserves these */
if(2 < sockd)
{
close(sockd);
sockd = 0;
}
if(filed)
{
fclose(filed);
filed = NULL;
}
if(sendbufp)
{
free(sendbufp);
sendbufp = NULL;
}
/* todo: init uristruct as well */
if( uri_parse(urip, &uristruct))
{
errstr = "couldn't parse URI.";
goto err;
}
if(param[V])
{
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
}
/* we should probably modify uri_parse to return a zero value on failure... */ /* we should probably modify uri_parse to return a zero value on failure... */
if(uri_parse(argv[optind], &uristruct)) /* make errstr display the URI -- need mastrcat? */
if( NULL == (sendbufp = reqgen(&uristruct)))
{ {
errstr = "[-q|[-v -b]] [-o <file>] uri"; errstr = "couldn't generate request. Unknown protocol?";
goto usage; goto err;
} }
/* if outpath isn't set because we haven't received a -o param, */ /* if outpath isn't set because we haven't received a -o param, */
/* then we should set the outpath to the final component of the */ /* then we should set the outpath to the final component of the */
/* URI. */ /* URI. */
/* If we can't do that, we should just set it to 'default'. */ /* If we can't do that, we should just set it to 'default'. */
/* todo: maybe add a check to ensure we don't overwrite? but if */
/* the user tells us to, who are we to question them? */
if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/'))))) if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/')))))
{ {
outpath = "default"; outpath = "default";
} }
/* generate our request */ /* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
if( !(sendbufp = reqgen(&uristruct)))
{
errstr = "Unknown protocol.";
goto err;
}
if(param[V]) if(param[V])
{ {
fprintf(stderr, "request: %s\n", sendbufp); fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
} }
/* having a routine or a global variable to track whether we need TLS would be */ /* definitely going to break tls out of dial, this is really bad */
/* nice to add in the future */
/* could probably store whether tls is necessary in a char within uristruct... */
/* Note: at the moment, if the TLS pointer passed is non-NULL, */
/* dial's return code can only be treated as an indicator of success. */
if(!strcmp("https", uristruct.proto)) if(!strcmp("https", uristruct.proto))
{ {
sockd = dial(uristruct.fqdn, uristruct.proto, &tlsc); if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
}
else
{ {
sockd = dial(uristruct.fqdn, uristruct.proto, NULL); errstr = "failed to connect";
}
if(!sockd)
{
errstr = "Couldn't connect to the host using the specified protocol.";
goto err; goto err;
} }
if(tlsc)
{
translen = tls_write(tlsc, sendbufp, strlen(sendbufp));
}
else
{
translen = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(translen)
{
if(param[V]) if(param[V])
{ {
printf("send: %d bytes\n", translen); fprintf(stderr, "setting up TLS...\n");
} }
} }
else else
{ {
errstr = "Couldn't transmit data."; /* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
{
errstr = "failed to connect";
goto err; goto err;
} }
}
/* now for a slightly more complex version of the same routine: until we've encountered the */ if(param[V])
/* delimiter, "\r\n\r\n", don't write to disk. Once we have, calculate the size of the body, */
/* then write that number of bytes to disk starting from the offset. Then, write everything */
/* until end of transmission. */
for(gotheader = 0, offsetp = NULL, translen = 1; translen > 0; memset(recvbuf, 0, BUFSIZ+1))
{ {
if( NULL == tlsc) fprintf(stderr, "Sending request...\n %s\n", sendbufp);
}
if(NULL != tlsc)
{ {
printf("recv: %d bytes\n", translen = recv(sockd, recvbuf, BUFSIZ, 0)); i = tls_write(tlsc, sendbufp, strlen(sendbufp));
} }
else else
{ {
printf("recv: %d bytes\n", translen = tls_read(tlsc, recvbuf, BUFSIZ)); i = send(sockd, sendbufp, strlen(sendbufp), 0);
} }
/* if we haven't gotten the header and our delimiter isn't in the */ if(param[V])
/* received string, we're getting a multipart header and need to */
/* skip over it. */
/* right now we assume HTTP/S -- this is undesirable... */
if( strcmp("gopher", uristruct.proto) && !gotheader)
{ {
if( NULL == (offsetp = strstr(recvbuf, "\r\n\r\n"))) fprintf(stderr, "sent: %d bytes\n", i);
}
/* actual read loop */
int gotheader;
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
{ {
printf("continuing...\n"); if(NULL == tlsc)
continue; {
translen = recv(sockd, recvbufp, BUFSIZ, 0);
} }
else else
{ {
gotheader = 1; translen = tls_read(tlsc, recvbufp, BUFSIZ);
}
if(param[V])
{
fprintf(stderr, "recv: %d bytes\n", translen);
}
/* parsing here? */
if(!strncmp(uristruct.proto, "http", 4) && !gotheader)
{
switch(resp_parse_http(recvbufp))
{
case -1:
if(param[V])
{
fprintf(stderr, "Response header parsing unnecessary, moving on...\n");
}
break;
case 200:
/* by now we have the first transmission from the server that we actually care about */
/* we just need to get the the end of the headres, now that we're done with 'em */
if(param[V])
{
fprintf(stderr, "200 OKAY, moving to end of header...\n");
}
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
{
fprintf(stderr, "Searching to end of header...\n");
if(NULL != tlsc)
{
tls_read(tlsc, recvbufp, BUFSIZ);
}
else
{
recv(sockd, recvbufp, BUFSIZ, 0);
}
}
/* move forward four to get past the delimiter */
/* todo: add error checking in the case of never receiving a delimiter */
offsetp += 4; offsetp += 4;
printf("got the header\n"); gotheader = 1;
break;
/* intentional drop through from 301 to 302 */
case 301:
case 302:
urip = http_header_extract("ocation", recvbufp);
if(param[V])
{
fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path);
}
redirnum++;
goto start;
case 400:
errstr = "400 Bad Request. Internal apport error?";
goto err;
default:
fprintf(stdout, "%s", recvbufp);
errstr = "invalid response from server.";
goto err;
} }
} }
if( !filed && !(filed = fopen(outpath, "w")))
if( !filed && NULL == (filed = fopen(outpath, "w")))
{ {
errstr = "couldn't open file"; errstr = "couldn't open file.";
goto err; goto err;
} }
fwrite(offsetp, sizeof(char), (translen - (offsetp - recvbuf)), filed); i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
offsetp = recvbuf; if(param[V] >= 2)
fprintf(stdout, "%s", recvbufp);
if(param[V])
{
fprintf(stderr, "fwrite: %d bytes\n", i);
} }
free(recvbuf);
offsetp = recvbufp;
}
close(sockd); close(sockd);
fclose(filed); fclose(filed);
free(sendbufp);
free(recvbufp);
break;
}
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
usage:
fprintf(stderr, "usage: %s [-qvb] [-o file] uri\n", argv[0]);
exit(EXIT_FAILURE);
err: err:
fprintf(stderr, "%s: %s\n", argv[0], errstr); fprintf(stderr, "%s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
usage:
fprintf(stderr, "usage: %s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
} }

View File

@ -32,10 +32,20 @@ char *reqgen(uri *urip)
return(NULL); return(NULL);
} }
void throw(int errcode) /* takes a data buffer and returns an integer corresponding to the server's response value */
/* if not applicable, return -1 */
/* if not found, return 0 */
int resp_parse(char *data, uri *uristruct)
{ {
fprintf(stderr, "apport: error %d.\n", errcode); if(strncmp("http", uristruct->proto, 4))
exit(errcode); {
return(resp_parse_http(data));
}
else
{
return(-1);
}
} }
/* return a pointer to a character array on the heap consisting of all bytes */ /* return a pointer to a character array on the heap consisting of all bytes */
@ -46,7 +56,7 @@ char *substr_extract(const char *str, int start, int end)
char *substr; char *substr;
substr_len = (end - start); substr_len = (end - start);
substr = 0; substr = NULL;
/* account for zero index plus the nullterm */ /* account for zero index plus the nullterm */
if( !(substr = malloc((substr_len + 1)))) if( !(substr = malloc((substr_len + 1))))
@ -58,3 +68,26 @@ char *substr_extract(const char *str, int start, int end)
return(substr); return(substr);
} }
/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */
/* and str2 joined end-to-end. */
char *mastrcat(char *str1, char *str2)
{
unsigned long int nbi, stri, nbsize;
char *nbuf;
nbi = stri = 0;
nbsize = (strlen(str1) + strlen(str2));
nbuf = malloc(nbsize);
for(stri = 0; str1[stri] != '\0'; nbi++, stri++)
{
nbuf[nbi] = str1[stri];
}
for(stri = 0; str2[stri] != '\0'; nbi++, stri++)
{
nbuf[nbi] = str2[stri];
}
return nbuf;
}

View File

@ -35,5 +35,6 @@ typedef struct
char *reqgen(uri *urip); char *reqgen(uri *urip);
void throw(int errcode); int resp_parse(char *data, uri *uristruct);
char *substr_extract(const char *str, int start, int end); char *substr_extract(const char *str, int start, int end);
char *mastrcat(char *str1, char *str2);

View File

@ -7,26 +7,38 @@
/* exact one right now. anyway, it's a little too general for my tastes, */ /* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */ /* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */ /* need to modify this in the future to be less liberal... */
#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?" #define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
#define SUBSTR_COUNT 8 #define SUBSTR_COUNT 9
#define PROTO 1 #define PROTO 2
#define FQDN 3 #define FQDN 4
#define PATH 4 #define PATH 5
int uri_parse(const char *uristr, uri *res) int uri_parse(const char *uristr, uri *res)
{ {
int i;
int regerrcode;
char validp; char validp;
char *pathp;
regex_t regexp; regex_t regexp;
regmatch_t match[SUBSTR_COUNT]; regmatch_t match[SUBSTR_COUNT+5];
char errbuf[BUFSIZ] = {0};
validp = 0; validp = 0;
regcomp(&regexp, REGEX_URI, REG_EXTENDED); if( (regerrcode = regcomp(&regexp, REGEX_URI, REG_EXTENDED)))
if( (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0))) {
regerror(regerrcode, &regexp, errbuf, BUFSIZ);
fprintf(stderr, "regular expression error: %s\n", errbuf);
return(1);
}
if( REG_NOMATCH == (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{ {
return(1); return(1);
} }
@ -37,7 +49,6 @@ int uri_parse(const char *uristr, uri *res)
/* with experience. */ /* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo); res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo); res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */ /* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */ /* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */ /* document from an HTTP server -- and, presumably, others. We'll */
@ -48,7 +59,18 @@ int uri_parse(const char *uristr, uri *res)
} }
else else
{ {
/* we only have a simple path */
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo); res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
/* /\* we have a more complex path *\/ */
/* if(0 != match[PATH+1].rm_so) */
/* { */
/* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
/* { */
/* /\* memory leak here that needs to be addressed *\/ */
/* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
/* } */
/* } */
} }