diff --git a/CHANGES b/CHANGES index 5fd601b..2128d6b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,16 @@ +2022/12/13 - v0.3.1b + -Fixed Gopher support + -Improved URI parsing + +2022/12/12 - v0.3b + -Crashes in new, exciting ways! + -I couldn't get it to segfault, but ymmv. + -Has TLS still. It's less trash this time but still not great. + -Still smaller than hurl, still more readable. + -Still has Gopher, but the new main() breaks support. Will fix. + -Apparently doesn't work on BSD. Will get around to this. + -<<>> Has HTTP redirect capabilities. Woaoaoaoao. + 2022/12/7 - v0.2b -It did segfault. Segfaults less now. -Still has TLS. @@ -10,4 +23,4 @@ Features: -Doesn't segfault. Probably. -Has TLS. - -It's smaller and more readable than hurl. Nyaaah. \ No newline at end of file + -It's smaller and more readable than hurl. Nyaaah diff --git a/src/Makefile b/src/Makefile index 26e5be6..9f1f933 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,8 @@ .POSIX: -LIBS = -ltls +SSL = bearssl + +LIBS = -ltls -l$(SSL) OBJ = \ main.c \ diff --git a/src/connect.c b/src/connect.c index 570c1a4..d353caf 100644 --- a/src/connect.c +++ b/src/connect.c @@ -1,9 +1,10 @@ #include "headers.h" + int dial(const char *fqdn, const char *proto, struct tls **tlsres) - { - int sd; - struct addrinfo *ainfo; + { + int sd; + struct addrinfo *ainfo; struct tls_config *tlshints; @@ -17,7 +18,7 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres) return(0); } - if(connect(sd, ainfo->ai_addr, sizeof(struct sockaddr))) + if(connect(sd, ainfo->ai_addr, ainfo->ai_addrlen)) { return(0); } diff --git a/src/http.c b/src/http.c index 831a22a..7d81fed 100644 --- a/src/http.c +++ b/src/http.c @@ -22,3 +22,51 @@ int reqgen_http(const char *path, const char *fqdn, char **nbuf) return(0); } + +int resp_parse_http(char *data) + { + char *buf; + + if( NULL == (buf = calloc(4, sizeof(char)))) + { + goto err; + } + + /* there are nine characters between the start of data and the first character of the response code */ + data += 9; + memcpy(buf, data, 3); + return(atoi(buf)); + + err: + return(0); + } + +char *http_header_extract(char *key, char *data) + { + char *keyp, *keyp_end; + char *returnp; + + if( NULL == (keyp = strstr(data, key))) + { + goto err; + } + else + { + for(; 0 != *keyp && *keyp != ':'; keyp++); + for(; 0 != *keyp && !isalnum(*keyp); keyp++); + for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++); + + if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char)))) + { + goto err; + } + + memcpy(returnp, keyp, (keyp_end - keyp)); + } + + + return(returnp); + + err: + return(NULL); + } diff --git a/src/http.h b/src/http.h index dc311ac..055b474 100644 --- a/src/http.h +++ b/src/http.h @@ -1,2 +1,19 @@ int reqgen_http(const char *path, const char *fqdn, char **nbuf);int http_content_length(char *data); -char *http_skip_header(char *data, int recvlen); +int resp_parse_http(char *data); + +/* http_header_extract -- function to return value corresponding to key */ +/* key -- null-terminated buffer containing a key to retrieve a value of */ +/* data -- null-terminated buffer containing a valid http response header */ +/* return value -- NULL on failure, otherwise value corresponding to key in null-terminated buffer */ +/* example -- + if(uri_parse(http_header_extract("Location", recvbuf), &uri)) + { + printf("redirecting to: %s\n", uri->host); + } + else + { + printf("failed to find a valid redirect. bailing out\n"); + exit(); + } +*/ +char *http_header_extract(char *key, char *data); diff --git a/src/main.c b/src/main.c index e6c0220..1fa02df 100644 --- a/src/main.c +++ b/src/main.c @@ -22,6 +22,13 @@ enum V }; +enum + { + /* change this to alter how many redirect attempts should be tried */ + /* todo: make this a command-line parameter? */ + REDIR_LIM = 10 + }; + char param[4] = {0}; /* below is to accompany param[O] */ @@ -29,17 +36,18 @@ char *outpath; int main(int argc, char **argv) { - int i, translen, gotheader; + int i, translen, redirnum; int sockd; - char *recvbuf; + char *recvbufp; char *sendbufp, *offsetp, *errstr; + char *urip; FILE *filed; struct tls *tlsc; uri uristruct; i = translen = sockd = 0; sendbufp = offsetp = 0; - if( (recvbuf = malloc(BUFSIZ+1)) == NULL) + if( (recvbufp = malloc(BUFSIZ+1)) == NULL) { errstr = "failed to init"; goto err; @@ -48,7 +56,10 @@ int main(int argc, char **argv) filed = 0; tlsc = NULL; - + if(argc == 1) + { + goto usage; + } for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0) { switch(i) @@ -77,140 +88,242 @@ int main(int argc, char **argv) break; default: - errstr = "[-vb] [-q] [-o ] uri"; - goto usage; - } - } - - - /* we should probably modify uri_parse to return a zero value on failure... */ - if(uri_parse(argv[optind], &uristruct)) - { - errstr = "[-q|[-v -b]] [-o ] uri"; goto usage; - } - - /* if outpath isn't set because we haven't received a -o param, */ - /* then we should set the outpath to the final component of the */ - /* URI. */ - /* If we can't do that, we should just set it to 'default'. */ - if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/'))))) - { - outpath = "default"; - } - - /* generate our request */ - if( !(sendbufp = reqgen(&uristruct))) - { - errstr = "Unknown protocol."; - goto err; - } - - if(param[V]) - { - fprintf(stderr, "request: %s\n", sendbufp); - } - - /* having a routine or a global variable to track whether we need TLS would be */ - /* nice to add in the future */ - /* could probably store whether tls is necessary in a char within uristruct... */ - /* Note: at the moment, if the TLS pointer passed is non-NULL, */ - /* dial's return code can only be treated as an indicator of success. */ - - if(!strcmp("https", uristruct.proto)) - { - sockd = dial(uristruct.fqdn, uristruct.proto, &tlsc); - } - else - { - sockd = dial(uristruct.fqdn, uristruct.proto, NULL); - } - - if(!sockd) - { - errstr = "Couldn't connect to the host using the specified protocol."; - goto err; - } - - if(tlsc) - { - translen = tls_write(tlsc, sendbufp, strlen(sendbufp)); - } - else - { - translen = send(sockd, sendbufp, strlen(sendbufp), 0); - } - - if(translen) - { - if(param[V]) - { - printf("send: %d bytes\n", translen); } } - else - { - errstr = "Couldn't transmit data."; - goto err; - } + urip = argv[optind]; - /* now for a slightly more complex version of the same routine: until we've encountered the */ - /* delimiter, "\r\n\r\n", don't write to disk. Once we have, calculate the size of the body, */ - /* then write that number of bytes to disk starting from the offset. Then, write everything */ - /* until end of transmission. */ - for(gotheader = 0, offsetp = NULL, translen = 1; translen > 0; memset(recvbuf, 0, BUFSIZ+1)) - { - if( NULL == tlsc) - { - printf("recv: %d bytes\n", translen = recv(sockd, recvbuf, BUFSIZ, 0)); - } - else - { - printf("recv: %d bytes\n", translen = tls_read(tlsc, recvbuf, BUFSIZ)); - } - /* if we haven't gotten the header and our delimiter isn't in the */ - /* received string, we're getting a multipart header and need to */ - /* skip over it. */ - /* right now we assume HTTP/S -- this is undesirable... */ - if( strcmp("gopher", uristruct.proto) && !gotheader) - { - if( NULL == (offsetp = strstr(recvbuf, "\r\n\r\n"))) - { - printf("continuing...\n"); - continue; - } - else - { - gotheader = 1; - offsetp += 4; - printf("got the header\n"); - } - } + for(redirnum = 0; redirnum < REDIR_LIM;) + { + start: - if( !filed && !(filed = fopen(outpath, "w"))) + if(NULL != tlsc) { - errstr = "couldn't open file"; + tls_free(tlsc); + tlsc = NULL; + } + + /* I don't care *what* you say, the system should *never* in a sane context return 0, 1, or 2 as a valid file descriptor */ + /* libc reserves these */ + if(2 < sockd) + { + close(sockd); + sockd = 0; + } + + if(filed) + { + fclose(filed); + filed = NULL; + } + + if(sendbufp) + { + free(sendbufp); + sendbufp = NULL; + } + + /* todo: init uristruct as well */ + + + if( uri_parse(urip, &uristruct)) + { + errstr = "couldn't parse URI."; goto err; } - fwrite(offsetp, sizeof(char), (translen - (offsetp - recvbuf)), filed); - offsetp = recvbuf; + if(param[V]) + { + fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path); + } + + + /* we should probably modify uri_parse to return a zero value on failure... */ + /* make errstr display the URI -- need mastrcat? */ + if( NULL == (sendbufp = reqgen(&uristruct))) + { + errstr = "couldn't generate request. Unknown protocol?"; + goto err; + } + + /* if outpath isn't set because we haven't received a -o param, */ + /* then we should set the outpath to the final component of the */ + /* URI. */ + /* If we can't do that, we should just set it to 'default'. */ + /* todo: maybe add a check to ensure we don't overwrite? but if */ + /* the user tells us to, who are we to question them? */ + if(NULL == outpath && !strlen((outpath = (1 + strrchr(uristruct.path, '/'))))) + { + outpath = "default"; + } + + /* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */ + if(param[V]) + { + fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto); + } + + /* definitely going to break tls out of dial, this is really bad */ + if(!strcmp("https", uristruct.proto)) + { + if(!dial(uristruct.fqdn, uristruct.proto, &tlsc)) + { + errstr = "failed to connect"; + goto err; + } + + if(param[V]) + { + fprintf(stderr, "setting up TLS...\n"); + } + } + else + { + /* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */ + if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL))) + { + errstr = "failed to connect"; + goto err; + } + } + + if(param[V]) + { + fprintf(stderr, "Sending request...\n %s\n", sendbufp); + } + + if(NULL != tlsc) + { + i = tls_write(tlsc, sendbufp, strlen(sendbufp)); + } + else + { + i = send(sockd, sendbufp, strlen(sendbufp), 0); + } + + if(param[V]) + { + fprintf(stderr, "sent: %d bytes\n", i); + } + + /* actual read loop */ + int gotheader; + for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1)) + { + if(NULL == tlsc) + { + translen = recv(sockd, recvbufp, BUFSIZ, 0); + } + else + { + translen = tls_read(tlsc, recvbufp, BUFSIZ); + } + + if(param[V]) + { + fprintf(stderr, "recv: %d bytes\n", translen); + } + + /* parsing here? */ + if(!strncmp(uristruct.proto, "http", 4) && !gotheader) + { + switch(resp_parse_http(recvbufp)) + { + case -1: + if(param[V]) + { + fprintf(stderr, "Response header parsing unnecessary, moving on...\n"); + } + + break; + + case 200: + /* by now we have the first transmission from the server that we actually care about */ + /* we just need to get the the end of the headres, now that we're done with 'em */ + if(param[V]) + { + fprintf(stderr, "200 OKAY, moving to end of header...\n"); + } + + + for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));) + { + fprintf(stderr, "Searching to end of header...\n"); + if(NULL != tlsc) + { + tls_read(tlsc, recvbufp, BUFSIZ); + } + else + { + recv(sockd, recvbufp, BUFSIZ, 0); + } + } + + /* move forward four to get past the delimiter */ + /* todo: add error checking in the case of never receiving a delimiter */ + offsetp += 4; + gotheader = 1; + + break; + + /* intentional drop through from 301 to 302 */ + case 301: + case 302: + urip = http_header_extract("ocation", recvbufp); + if(param[V]) + { + fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path); + } + redirnum++; + goto start; + + case 400: + errstr = "400 Bad Request. Internal apport error?"; + goto err; + + default: + fprintf(stdout, "%s", recvbufp); + errstr = "invalid response from server."; + goto err; + } + } + + + if( !filed && NULL == (filed = fopen(outpath, "w"))) + { + errstr = "couldn't open file."; + goto err; + } + + i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed); + if(param[V] >= 2) + fprintf(stdout, "%s", recvbufp); + + if(param[V]) + { + fprintf(stderr, "fwrite: %d bytes\n", i); + } + + offsetp = recvbufp; + } + + close(sockd); + fclose(filed); + free(sendbufp); + free(recvbufp); + + break; } - free(recvbuf); - close(sockd); - fclose(filed); - + exit(EXIT_SUCCESS); - + usage: + fprintf(stderr, "usage: %s [-qvb] [-o file] uri\n", argv[0]); + exit(EXIT_FAILURE); + err: fprintf(stderr, "%s: %s\n", argv[0], errstr); exit(EXIT_FAILURE); - - usage: - fprintf(stderr, "usage: %s: %s\n", argv[0], errstr); - exit(EXIT_FAILURE); - } diff --git a/src/support.c b/src/support.c index 8b6aa1a..73f4f5a 100644 --- a/src/support.c +++ b/src/support.c @@ -32,29 +32,62 @@ char *reqgen(uri *urip) return(NULL); } -void throw(int errcode) - { - fprintf(stderr, "apport: error %d.\n", errcode); - exit(errcode); - } +/* takes a data buffer and returns an integer corresponding to the server's response value */ +/* if not applicable, return -1 */ +/* if not found, return 0 */ +int resp_parse(char *data, uri *uristruct) + { + if(strncmp("http", uristruct->proto, 4)) + { + return(resp_parse_http(data)); + } + else + { + return(-1); + } + + } /* return a pointer to a character array on the heap consisting of all bytes */ /* between start and end in str. */ char *substr_extract(const char *str, int start, int end) - { - int substr_len; - char *substr; - - substr_len = (end - start); - substr = 0; - - /* account for zero index plus the nullterm */ - if( !(substr = malloc((substr_len + 1)))) { - return(NULL); - } + int substr_len; + char *substr; + + substr_len = (end - start); + substr = NULL; + + /* account for zero index plus the nullterm */ + if( !(substr = malloc((substr_len + 1)))) + { + return(NULL); + } memcpy(substr, str+start, substr_len); return(substr); } + +/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */ +/* and str2 joined end-to-end. */ +char *mastrcat(char *str1, char *str2) + { + unsigned long int nbi, stri, nbsize; + char *nbuf; + nbi = stri = 0; + nbsize = (strlen(str1) + strlen(str2)); + nbuf = malloc(nbsize); + + for(stri = 0; str1[stri] != '\0'; nbi++, stri++) + { + nbuf[nbi] = str1[stri]; + } + + for(stri = 0; str2[stri] != '\0'; nbi++, stri++) + { + nbuf[nbi] = str2[stri]; + } + + return nbuf; + } diff --git a/src/support.h b/src/support.h index acd007e..ebd8e8f 100644 --- a/src/support.h +++ b/src/support.h @@ -35,5 +35,6 @@ typedef struct char *reqgen(uri *urip); -void throw(int errcode); +int resp_parse(char *data, uri *uristruct); char *substr_extract(const char *str, int start, int end); +char *mastrcat(char *str1, char *str2); diff --git a/src/uri.c b/src/uri.c index 91fbf2f..01bf903 100644 --- a/src/uri.c +++ b/src/uri.c @@ -7,54 +7,76 @@ /* exact one right now. anyway, it's a little too general for my tastes, */ /* but the one I came up with was trash (unsurprisingly) so here we are. */ /* need to modify this in the future to be less liberal... */ -#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?" +#define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?" +#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?" +#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?" +#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)" -#define SUBSTR_COUNT 8 -#define PROTO 1 -#define FQDN 3 -#define PATH 4 +#define SUBSTR_COUNT 9 +#define PROTO 2 +#define FQDN 4 +#define PATH 5 int uri_parse(const char *uristr, uri *res) - { - char validp; - regex_t regexp; - regmatch_t match[SUBSTR_COUNT]; + { + int i; + int regerrcode; + char validp; + char *pathp; + regex_t regexp; + regmatch_t match[SUBSTR_COUNT+5]; + char errbuf[BUFSIZ] = {0}; - validp = 0; + validp = 0; - regcomp(®exp, REGEX_URI, REG_EXTENDED); - if( (validp = regexec(®exp, uristr, SUBSTR_COUNT, match, 0))) - { - return(1); + if( (regerrcode = regcomp(®exp, REGEX_URI, REG_EXTENDED))) + { + regerror(regerrcode, ®exp, errbuf, BUFSIZ); + fprintf(stderr, "regular expression error: %s\n", errbuf); + return(1); + } + if( REG_NOMATCH == (validp = regexec(®exp, uristr, SUBSTR_COUNT, match, 0))) + { + return(1); + } + + + /* not very elegant but it does the job. i shouldn't be thinking about */ + /* elegance at this stage in my programming life, anyways... comes */ + /* with experience. */ + res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo); + res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo); + /* if the difference below is less than 1, our path doesn't exist. */ + /* Compensate by setting it to '/' which will always return a root */ + /* document from an HTTP server -- and, presumably, others. We'll */ + /* see, I suppose. */ + if((match[PATH].rm_eo - match[PATH].rm_so) < 1) + { + res->path = "/"; + } + else + { + /* we only have a simple path */ + res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo); + /* /\* we have a more complex path *\/ */ + /* if(0 != match[PATH+1].rm_so) */ + /* { */ + /* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */ + /* { */ + /* /\* memory leak here that needs to be addressed *\/ */ + /* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */ + /* } */ + /* } */ + + } + + + return(0); } - /* not very elegant but it does the job. i shouldn't be thinking about */ - /* elegance at this stage in my programming life, anyways... comes */ - /* with experience. */ - res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo); - res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo); - - /* if the difference below is less than 1, our path doesn't exist. */ - /* Compensate by setting it to '/' which will always return a root */ - /* document from an HTTP server -- and, presumably, others. We'll */ - /* see, I suppose. */ - if((match[PATH].rm_eo - match[PATH].rm_so) < 1) - { - res->path = "/"; - } - else - { - res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo); - } - - - return(0); - } - -