See CHANGES.

This commit is contained in:
Mid Favila 2022-12-13 13:12:07 -04:00
parent 61415b6069
commit 8bba6e9f60
9 changed files with 435 additions and 185 deletions

15
CHANGES
View File

@ -1,3 +1,16 @@
2022/12/13 - v0.3.1b
-Fixed Gopher support
-Improved URI parsing
2022/12/12 - v0.3b
-Crashes in new, exciting ways!
-I couldn't get it to segfault, but ymmv.
-Has TLS still. It's less trash this time but still not great.
-Still smaller than hurl, still more readable.
-Still has Gopher, but the new main() breaks support. Will fix.
-Apparently doesn't work on BSD. Will get around to this.
-<<<!!!NEW!!!>>> Has HTTP redirect capabilities. Woaoaoaoao.
2022/12/7 - v0.2b
-It did segfault. Segfaults less now.
-Still has TLS.
@ -10,4 +23,4 @@
Features:
-Doesn't segfault. Probably.
-Has TLS.
-It's smaller and more readable than hurl. Nyaaah.
-It's smaller and more readable than hurl. Nyaaah

View File

@ -1,6 +1,8 @@
.POSIX:
LIBS = -ltls
SSL = bearssl
LIBS = -ltls -l$(SSL)
OBJ = \
main.c \

View File

@ -1,9 +1,10 @@
#include "headers.h"
int dial(const char *fqdn, const char *proto, struct tls **tlsres)
{
int sd;
struct addrinfo *ainfo;
{
int sd;
struct addrinfo *ainfo;
struct tls_config *tlshints;
@ -17,7 +18,7 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres)
return(0);
}
if(connect(sd, ainfo->ai_addr, sizeof(struct sockaddr)))
if(connect(sd, ainfo->ai_addr, ainfo->ai_addrlen))
{
return(0);
}

View File

@ -22,3 +22,51 @@ int reqgen_http(const char *path, const char *fqdn, char **nbuf)
return(0);
}
int resp_parse_http(char *data)
{
char *buf;
if( NULL == (buf = calloc(4, sizeof(char))))
{
goto err;
}
/* there are nine characters between the start of data and the first character of the response code */
data += 9;
memcpy(buf, data, 3);
return(atoi(buf));
err:
return(0);
}
char *http_header_extract(char *key, char *data)
{
char *keyp, *keyp_end;
char *returnp;
if( NULL == (keyp = strstr(data, key)))
{
goto err;
}
else
{
for(; 0 != *keyp && *keyp != ':'; keyp++);
for(; 0 != *keyp && !isalnum(*keyp); keyp++);
for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
{
goto err;
}
memcpy(returnp, keyp, (keyp_end - keyp));
}
return(returnp);
err:
return(NULL);
}

View File

@ -1,2 +1,19 @@
int reqgen_http(const char *path, const char *fqdn, char **nbuf);int http_content_length(char *data);
char *http_skip_header(char *data, int recvlen);
int resp_parse_http(char *data);
/* http_header_extract -- function to return value corresponding to key */
/* key -- null-terminated buffer containing a key to retrieve a value of */
/* data -- null-terminated buffer containing a valid http response header */
/* return value -- NULL on failure, otherwise value corresponding to key in null-terminated buffer */
/* example --
if(uri_parse(http_header_extract("Location", recvbuf), &uri))
{
printf("redirecting to: %s\n", uri->host);
}
else
{
printf("failed to find a valid redirect. bailing out\n");
exit();
}
*/
char *http_header_extract(char *key, char *data);

View File

@ -22,6 +22,13 @@ enum
V
};
enum
{
/* change this to alter how many redirect attempts should be tried */
/* todo: make this a command-line parameter? */
REDIR_LIM = 10
};
char param[4] = {0};
/* below is to accompany param[O] */
@ -29,17 +36,18 @@ char *outpath;
int main(int argc, char **argv)
{
int i, translen, gotheader;
int i, translen, redirnum;
int sockd;
char *recvbuf;
char *recvbufp;
char *sendbufp, *offsetp, *errstr;
char *urip;
FILE *filed;
struct tls *tlsc;
uri uristruct;
i = translen = sockd = 0;
sendbufp = offsetp = 0;
if( (recvbuf = malloc(BUFSIZ+1)) == NULL)
if( (recvbufp = malloc(BUFSIZ+1)) == NULL)
{
errstr = "failed to init";
goto err;
@ -48,7 +56,10 @@ int main(int argc, char **argv)
filed = 0;
tlsc = NULL;
if(argc == 1)
{
goto usage;
}
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
{
switch(i)
@ -77,140 +88,242 @@ int main(int argc, char **argv)
break;
default:
errstr = "[-vb] [-q] [-o <file>] uri";
goto usage;
}
}
/* we should probably modify uri_parse to return a zero value on failure... */
if(uri_parse(argv[optind], &uristruct))
{
errstr = "[-q|[-v -b]] [-o <file>] uri";
goto usage;
}
/* if outpath isn't set because we haven't received a -o param, */
/* then we should set the outpath to the final component of the */
/* URI. */
/* If we can't do that, we should just set it to 'default'. */
if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/')))))
{
outpath = "default";
}
/* generate our request */
if( !(sendbufp = reqgen(&uristruct)))
{
errstr = "Unknown protocol.";
goto err;
}
if(param[V])
{
fprintf(stderr, "request: %s\n", sendbufp);
}
/* having a routine or a global variable to track whether we need TLS would be */
/* nice to add in the future */
/* could probably store whether tls is necessary in a char within uristruct... */
/* Note: at the moment, if the TLS pointer passed is non-NULL, */
/* dial's return code can only be treated as an indicator of success. */
if(!strcmp("https", uristruct.proto))
{
sockd = dial(uristruct.fqdn, uristruct.proto, &tlsc);
}
else
{
sockd = dial(uristruct.fqdn, uristruct.proto, NULL);
}
if(!sockd)
{
errstr = "Couldn't connect to the host using the specified protocol.";
goto err;
}
if(tlsc)
{
translen = tls_write(tlsc, sendbufp, strlen(sendbufp));
}
else
{
translen = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(translen)
{
if(param[V])
{
printf("send: %d bytes\n", translen);
}
}
else
{
errstr = "Couldn't transmit data.";
goto err;
}
urip = argv[optind];
/* now for a slightly more complex version of the same routine: until we've encountered the */
/* delimiter, "\r\n\r\n", don't write to disk. Once we have, calculate the size of the body, */
/* then write that number of bytes to disk starting from the offset. Then, write everything */
/* until end of transmission. */
for(gotheader = 0, offsetp = NULL, translen = 1; translen > 0; memset(recvbuf, 0, BUFSIZ+1))
{
if( NULL == tlsc)
{
printf("recv: %d bytes\n", translen = recv(sockd, recvbuf, BUFSIZ, 0));
}
else
{
printf("recv: %d bytes\n", translen = tls_read(tlsc, recvbuf, BUFSIZ));
}
/* if we haven't gotten the header and our delimiter isn't in the */
/* received string, we're getting a multipart header and need to */
/* skip over it. */
/* right now we assume HTTP/S -- this is undesirable... */
if( strcmp("gopher", uristruct.proto) && !gotheader)
{
if( NULL == (offsetp = strstr(recvbuf, "\r\n\r\n")))
{
printf("continuing...\n");
continue;
}
else
{
gotheader = 1;
offsetp += 4;
printf("got the header\n");
}
}
for(redirnum = 0; redirnum < REDIR_LIM;)
{
start:
if( !filed && !(filed = fopen(outpath, "w")))
if(NULL != tlsc)
{
errstr = "couldn't open file";
tls_free(tlsc);
tlsc = NULL;
}
/* I don't care *what* you say, the system should *never* in a sane context return 0, 1, or 2 as a valid file descriptor */
/* libc reserves these */
if(2 < sockd)
{
close(sockd);
sockd = 0;
}
if(filed)
{
fclose(filed);
filed = NULL;
}
if(sendbufp)
{
free(sendbufp);
sendbufp = NULL;
}
/* todo: init uristruct as well */
if( uri_parse(urip, &uristruct))
{
errstr = "couldn't parse URI.";
goto err;
}
fwrite(offsetp, sizeof(char), (translen - (offsetp - recvbuf)), filed);
offsetp = recvbuf;
if(param[V])
{
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
}
/* we should probably modify uri_parse to return a zero value on failure... */
/* make errstr display the URI -- need mastrcat? */
if( NULL == (sendbufp = reqgen(&uristruct)))
{
errstr = "couldn't generate request. Unknown protocol?";
goto err;
}
/* if outpath isn't set because we haven't received a -o param, */
/* then we should set the outpath to the final component of the */
/* URI. */
/* If we can't do that, we should just set it to 'default'. */
/* todo: maybe add a check to ensure we don't overwrite? but if */
/* the user tells us to, who are we to question them? */
if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/')))))
{
outpath = "default";
}
/* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
if(param[V])
{
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
}
/* definitely going to break tls out of dial, this is really bad */
if(!strcmp("https", uristruct.proto))
{
if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
{
errstr = "failed to connect";
goto err;
}
if(param[V])
{
fprintf(stderr, "setting up TLS...\n");
}
}
else
{
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
{
errstr = "failed to connect";
goto err;
}
}
if(param[V])
{
fprintf(stderr, "Sending request...\n %s\n", sendbufp);
}
if(NULL != tlsc)
{
i = tls_write(tlsc, sendbufp, strlen(sendbufp));
}
else
{
i = send(sockd, sendbufp, strlen(sendbufp), 0);
}
if(param[V])
{
fprintf(stderr, "sent: %d bytes\n", i);
}
/* actual read loop */
int gotheader;
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
{
if(NULL == tlsc)
{
translen = recv(sockd, recvbufp, BUFSIZ, 0);
}
else
{
translen = tls_read(tlsc, recvbufp, BUFSIZ);
}
if(param[V])
{
fprintf(stderr, "recv: %d bytes\n", translen);
}
/* parsing here? */
if(!strncmp(uristruct.proto, "http", 4) && !gotheader)
{
switch(resp_parse_http(recvbufp))
{
case -1:
if(param[V])
{
fprintf(stderr, "Response header parsing unnecessary, moving on...\n");
}
break;
case 200:
/* by now we have the first transmission from the server that we actually care about */
/* we just need to get the the end of the headres, now that we're done with 'em */
if(param[V])
{
fprintf(stderr, "200 OKAY, moving to end of header...\n");
}
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
{
fprintf(stderr, "Searching to end of header...\n");
if(NULL != tlsc)
{
tls_read(tlsc, recvbufp, BUFSIZ);
}
else
{
recv(sockd, recvbufp, BUFSIZ, 0);
}
}
/* move forward four to get past the delimiter */
/* todo: add error checking in the case of never receiving a delimiter */
offsetp += 4;
gotheader = 1;
break;
/* intentional drop through from 301 to 302 */
case 301:
case 302:
urip = http_header_extract("ocation", recvbufp);
if(param[V])
{
fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path);
}
redirnum++;
goto start;
case 400:
errstr = "400 Bad Request. Internal apport error?";
goto err;
default:
fprintf(stdout, "%s", recvbufp);
errstr = "invalid response from server.";
goto err;
}
}
if( !filed && NULL == (filed = fopen(outpath, "w")))
{
errstr = "couldn't open file.";
goto err;
}
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
if(param[V] >= 2)
fprintf(stdout, "%s", recvbufp);
if(param[V])
{
fprintf(stderr, "fwrite: %d bytes\n", i);
}
offsetp = recvbufp;
}
close(sockd);
fclose(filed);
free(sendbufp);
free(recvbufp);
break;
}
free(recvbuf);
close(sockd);
fclose(filed);
exit(EXIT_SUCCESS);
usage:
fprintf(stderr, "usage: %s [-qvb] [-o file] uri\n", argv[0]);
exit(EXIT_FAILURE);
err:
fprintf(stderr, "%s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
usage:
fprintf(stderr, "usage: %s: %s\n", argv[0], errstr);
exit(EXIT_FAILURE);
}

View File

@ -32,29 +32,62 @@ char *reqgen(uri *urip)
return(NULL);
}
void throw(int errcode)
{
fprintf(stderr, "apport: error %d.\n", errcode);
exit(errcode);
}
/* takes a data buffer and returns an integer corresponding to the server's response value */
/* if not applicable, return -1 */
/* if not found, return 0 */
int resp_parse(char *data, uri *uristruct)
{
if(strncmp("http", uristruct->proto, 4))
{
return(resp_parse_http(data));
}
else
{
return(-1);
}
}
/* return a pointer to a character array on the heap consisting of all bytes */
/* between start and end in str. */
char *substr_extract(const char *str, int start, int end)
{
int substr_len;
char *substr;
substr_len = (end - start);
substr = 0;
/* account for zero index plus the nullterm */
if( !(substr = malloc((substr_len + 1))))
{
return(NULL);
}
int substr_len;
char *substr;
substr_len = (end - start);
substr = NULL;
/* account for zero index plus the nullterm */
if( !(substr = malloc((substr_len + 1))))
{
return(NULL);
}
memcpy(substr, str+start, substr_len);
return(substr);
}
/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */
/* and str2 joined end-to-end. */
char *mastrcat(char *str1, char *str2)
{
unsigned long int nbi, stri, nbsize;
char *nbuf;
nbi = stri = 0;
nbsize = (strlen(str1) + strlen(str2));
nbuf = malloc(nbsize);
for(stri = 0; str1[stri] != '\0'; nbi++, stri++)
{
nbuf[nbi] = str1[stri];
}
for(stri = 0; str2[stri] != '\0'; nbi++, stri++)
{
nbuf[nbi] = str2[stri];
}
return nbuf;
}

View File

@ -35,5 +35,6 @@ typedef struct
char *reqgen(uri *urip);
void throw(int errcode);
int resp_parse(char *data, uri *uristruct);
char *substr_extract(const char *str, int start, int end);
char *mastrcat(char *str1, char *str2);

View File

@ -7,54 +7,76 @@
/* exact one right now. anyway, it's a little too general for my tastes, */
/* but the one I came up with was trash (unsurprisingly) so here we are. */
/* need to modify this in the future to be less liberal... */
#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
#define SUBSTR_COUNT 8
#define PROTO 1
#define FQDN 3
#define PATH 4
#define SUBSTR_COUNT 9
#define PROTO 2
#define FQDN 4
#define PATH 5
int uri_parse(const char *uristr, uri *res)
{
char validp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT];
{
int i;
int regerrcode;
char validp;
char *pathp;
regex_t regexp;
regmatch_t match[SUBSTR_COUNT+5];
char errbuf[BUFSIZ] = {0};
validp = 0;
validp = 0;
regcomp(&regexp, REGEX_URI, REG_EXTENDED);
if( (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
if( (regerrcode = regcomp(&regexp, REGEX_URI, REG_EXTENDED)))
{
regerror(regerrcode, &regexp, errbuf, BUFSIZ);
fprintf(stderr, "regular expression error: %s\n", errbuf);
return(1);
}
if( REG_NOMATCH == (validp = regexec(&regexp, uristr, SUBSTR_COUNT, match, 0)))
{
return(1);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
/* we only have a simple path */
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
/* /\* we have a more complex path *\/ */
/* if(0 != match[PATH+1].rm_so) */
/* { */
/* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
/* { */
/* /\* memory leak here that needs to be addressed *\/ */
/* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
/* } */
/* } */
}
return(0);
}
/* not very elegant but it does the job. i shouldn't be thinking about */
/* elegance at this stage in my programming life, anyways... comes */
/* with experience. */
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
/* if the difference below is less than 1, our path doesn't exist. */
/* Compensate by setting it to '/' which will always return a root */
/* document from an HTTP server -- and, presumably, others. We'll */
/* see, I suppose. */
if((match[PATH].rm_eo - match[PATH].rm_so) < 1)
{
res->path = "/";
}
else
{
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
}
return(0);
}