See CHANGES.

2023-02-19 16:44:19 -04:00 · 2023-02-19 16:44:19 -04:00 · b35d49cc28
parent b5e1a30092
commit b35d49cc28
10 changed files with 184 additions and 184 deletions
--- a/6
+++ b/6
@ -1,3 +1,9 @@
 2023/02/19 - v0.3.2
    -Improved URI parsing again
    -Improved HTTP redirect handling capability
    -Moved TLS out of dial and into main
    -Improved debugging experience by shuffling around if(param[V])s 
 2022/12/13 - v0.3.1b
    -Fixed Gopher support
    -Improved URI parsing 
--- a/4
+++ b/4
@ -20,7 +20,7 @@ Right now, it retrieves data over HTTP or Gopher, and can support TLS,
 at least when it comes to HTTPS. I'd like a more general approach to
 TLS support in the future.
-Its source code is small (< 400 lines of C total according to cloc right
+Its source code is small (< 500 lines of C total according to cloc right
 now) and uses only POSIX routines -- there's no temptation to use GNU
 or BSD features because I develop against musl and tcc, and I don't even
 have the Linux manpages on any of my systems.
@ -36,7 +36,7 @@ Apportate also aims to have actually useful diagnostics; that is,
 compared to other tools, apportate aims to only provide useful error
 output. In the case of success, it follows the Rule of Silence; on
 unrecoverable errors, it aborts immediately. It supports multiple levels
-of verbosity, and exposes almost all of its internals.
+of verbosity, and exposes almost all of its internal operations for debugging..
 Its simple design and use should also make it relatively convenient
 for inclusion in shell scripts. 
--- a/src/connect.c
+++ b/src/connect.c
@ -1,19 +1,18 @@
 #include "headers.h"
-int dial(const char *fqdn, const char *proto, struct tls **tlsres)
+int dial(const char *fqdn, const char *proto)
    {
    int sd;
    struct addrinfo *ainfo; 
    struct tls_config *tlshints;
-    if(getaddrinfo(fqdn, proto, 0, &ainfo))
+    if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
 	{
 	return(0);
 	}
-    if( !(sd = socket(ainfo->ai_family, SOCK_STREAM, 0)))
+    if(getaddrinfo(fqdn, proto, 0, &ainfo))
 	{
 	return(0);
 	}
@ -23,34 +22,5 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres)
 	return(0);
 	}
    if(tlsres != 0)
 	{
 	close(sd);
 	if( 0 == (*tlsres = tls_client()))
 	    {
 	    goto err_ssl;
 	    }
 	if( 0 == (tlshints = tls_config_new()))
 	    {
 	    goto err_ssl;
 	    }
 	if(tls_configure(*tlsres, tlshints))
 	    {
 	    goto err_ssl;
 	    }
 	if( (tls_connect(*tlsres, fqdn, proto)))
 	    {
 	    goto err_ssl;
 	    }
 	}
    return(sd);
    err_ssl:
    return(0);
    }
--- a/src/connect.h
+++ b/src/connect.h
@ -8,4 +8,4 @@
 /*  sd -- successful connection returns a file descriptor connected to the fqdn */
 /* ERRCONN -- couldn't connect                                                  */
 /* ERRADDR -- couldn't get addrinfo                                             */
-int dial(const char *fqdn, const char *proto, struct tls **tls_res);
+int dial(const char *fqdn, const char *proto);
--- a/src/http.c
+++ b/src/http.c
@ -1,27 +1,27 @@
 #include "headers.h"
 const char REQ_HTTP[] = 
-  {
+    {
-  "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
+    "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
-  };
+    };
 int reqgen_http(const char *path, const char *fqdn, char **nbuf)
-  {
+    {
-  int buflen;
+    int buflen;
-  buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
+    buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
-  if( !(*nbuf = calloc(buflen, sizeof(char))))
+    if( !(*nbuf = calloc(buflen, sizeof(char))))
-      {
+	{
-      return(ERRMEM);
+	return(ERRMEM);
-      }
+	}
-  sprintf(*nbuf, REQ_HTTP, path, fqdn);
+    sprintf(*nbuf, REQ_HTTP, path, fqdn);
-  return(0);
+    return(0);
-  }
+    }
 int resp_parse_http(char *data)
    {
@ -41,32 +41,51 @@ int resp_parse_http(char *data)
    return(0);
    }
-char *http_header_extract(char *key, char *data)
+/* http_header_extract - return the value corresponding to a key if it exists in an http response, otherwise null */
 /* key - key to extract corresponding value */
 /* data - response header from which to extract key's value */
 char *http_get_keyval(char *key, char *data)
    {
-    char *keyp, *keyp_end;
+    char *buf;
-    char *returnp;
+    /* data indices */
    char *d_ind, *d_ind2;
-    if( NULL == (keyp = strstr(data, key)))
+    buf = NULL;
-	{
+    d_ind = d_ind2 = data;
 	goto err;
 	}
    else
 	{
 	for(; 0 != *keyp && *keyp != ':'; keyp++);
 	for(; 0 != *keyp && !isalnum(*keyp); keyp++);
 	for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
-	if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
+
    /* we ensure that our key and each key we compare to are lower-case because some */
    /* servers will return mixed-case keys and others single-case. by doing this we */
    /* can use full key specifiers, which avoids false matches that would occur as a */
    /* result of using partial key specifiers e.g "location" instead of "ocation". */
    key = buftolower(key);
    for(;*data !=  '\0'; data++)
 	{
 	if(NULL != buf)
 	    {
-	    goto err;
+	    free(buf);
 	    buf = NULL;
 	    for(; '\n' != *(data - 1); data++);
 	    d_ind = d_ind2 = data;
 	    }
-	memcpy(returnp, keyp, (keyp_end - keyp));
+	for(; *d_ind != ':'; d_ind++);
 	buf = substr_extract(data, 0, (d_ind - data));
 	buf = buftolower(buf);
 	if(!strcmp(key, buf))
 	    {
 	    free(buf);
 	    for(data = d_ind+2; *(1+d_ind) != '\n'; d_ind++);
 	    buf = substr_extract(data, 0, (d_ind - data));
 	    return(buf);
 	    }
 	}
    return(returnp);
    err:
    return(NULL);
    }
--- a/src/http.h
+++ b/src/http.h
@ -16,4 +16,4 @@ int resp_parse_http(char *data);
   exit();
   }
 */
-char *http_header_extract(char *key, char *data);
+char *http_get_keyval(char *key, char *data);
--- a/src/main.c
+++ b/src/main.c
@ -36,7 +36,7 @@ char *outpath;
 int main(int argc, char **argv)
    {
-    int i, translen, redirnum;
+    int i, translen, redirnum, gotheader;
    int sockd;
    char *recvbufp;
    char *sendbufp, *offsetp, *errstr;
@ -60,6 +60,7 @@ int main(int argc, char **argv)
 	{
 	goto usage;
 	}
    for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
 	{
 	switch(i)
@ -80,7 +81,7 @@ int main(int argc, char **argv)
 		break;
 	    case 'v':
-		/* we handle v differently because we want to support different levels of verbosity */
+	 	/* we handle v differently because we want to support different levels of verbosity */
 		if(!param[Q])
 		    {
 		    param[V]++;
@ -88,7 +89,7 @@ int main(int argc, char **argv)
 		break;
 	    default:
-	goto usage;
+		goto usage;
 	    }
 	}
    urip = argv[optind];
@ -125,26 +126,37 @@ int main(int argc, char **argv)
 	    }
 	/* todo: init uristruct as well */
-
+	
-
+	uristruct.proto = NULL;
 	uristruct.fqdn  = NULL;
 	uristruct.path  = NULL;
 	if( uri_parse(urip, &uristruct))
-	    {
+		{
-	    errstr = "couldn't parse URI.";
+		errstr = "couldn't parse URI.";
-	    goto err;
+		goto err;
-	    }
+		}
-	if(param[V])
+	if(param[V] >= 2)
 	    {
 	    fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
 	    if(param[V] >= 3)
 		{
 		fprintf(stderr, "length of proto: %lu\nlength of fqdn: %lu\nlength of path: %lu\n", strlen(uristruct.proto), strlen(uristruct.fqdn), strlen(uristruct.path));
 		}
 	    }
 	sendbufp = reqgen(&uristruct);
 	/* we should probably modify uri_parse to return a zero value on failure... */
 	/* make errstr display the URI -- need mastrcat? */
 	if( NULL == (sendbufp = reqgen(&uristruct)))
 	    {
-	    errstr = "couldn't generate request. Unknown protocol?";
+	    fprintf(stderr, "couldn't generate request. Unknown protocol: %s?\n", uristruct.proto);
-	    goto err;
+	    exit(-1);
 	    }
 	/* if outpath isn't set because we haven't received a -o param, */
@ -158,57 +170,73 @@ int main(int argc, char **argv)
 	    outpath = "default";
 	    }
 	/* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
 	if(param[V])
 	    {
 	    fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
 	    }
-	/* definitely going to break tls out of dial, this is really bad */
+	/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
-	if(!strcmp("https", uristruct.proto))
+	if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto)))
 	    {
-	    if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
+	    errstr = "failed to connect";
 	    goto err;
 	    }
 	/* todo: upgrade this to a more general mechanism */
 	if(!strncmp("https", uristruct.proto, 5))
 	    {
 	    if(NULL != tlsc)
 		{
-		errstr = "failed to connect";
+		tls_reset(tlsc);
 		goto err;
 		}
-	    if(param[V])
+	    struct tls_config *config = tls_config_new();
 	    tlsc = tls_client();
 	    tls_configure(tlsc, config);
 	    if(-1 == tls_connect_socket(tlsc, sockd, uristruct.fqdn))
 		{
-		fprintf(stderr, "setting up TLS...\n");
+		errstr = "failed to upgrade connection to use TLS, aborting\n";
 		}
 	    }
 	else
 	    {
 	    /* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
 	    if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
 		{
 		errstr = "failed to connect";
 		goto err;
 		}
 	    }
-	if(param[V])
+	if(param[V] >= 2)
 	    {
-	    fprintf(stderr, "Sending request...\n %s\n", sendbufp);
+	    fprintf(stderr, "Sending request...\n-----REQUEST START-----\n%s\n-----REQUEST END-----\n", sendbufp);
 	    }
 	if(NULL != tlsc)
 	    {
-	    i = tls_write(tlsc, sendbufp, strlen(sendbufp));
+	    if(param[V])
 		{
 		fprintf(stderr, "writing over tls...\n");
 		}
 	    if( -1 == (i = tls_write(tlsc, sendbufp, strlen(sendbufp))))
 		{
 		fprintf(stderr, "libtls internal error: ");
 		errstr = (char *) tls_error(tlsc);
 		goto err;
 		}
 	    }
 	else
 	    {
 	    if(param[V])
 		{
 		fprintf(stderr, "writing over socket...\n");
 		}
 	    i = send(sockd, sendbufp, strlen(sendbufp), 0);
 	    }
-	if(param[V])
+	if(param[V] >= 3)
 	    {
 	    fprintf(stderr, "sent: %d bytes\n", i);
 	    }
 	/* actual read loop */
-	int gotheader;
+
 	for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
 	    {
 	    if(NULL == tlsc)
@ -220,7 +248,7 @@ int main(int argc, char **argv)
 		translen = tls_read(tlsc, recvbufp, BUFSIZ);
 		}
-	    if(param[V])
+	    if(param[V] >= 3)
 		{
 		fprintf(stderr, "recv: %d bytes\n", translen);
 		}
@ -241,7 +269,7 @@ int main(int argc, char **argv)
 		    case 200:
 			/* by now we have the first transmission from the server that we actually care about */
 			/* we just need to get the the end of the headres, now that we're done with 'em */
-			if(param[V])
+			if(param[V] >= 3)
 			    {
 			    fprintf(stderr, "200 OKAY, moving to end of header...\n");
 			    }
@ -249,7 +277,11 @@ int main(int argc, char **argv)
 			for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
 			    {
-			    fprintf(stderr, "Searching to end of header...\n");
+			    if(param[V] >= 2)
 				{
 				fprintf(stderr, "Searching to end of header...\n");
 				}
 			    if(NULL != tlsc)
 				{
 				tls_read(tlsc, recvbufp, BUFSIZ);
@ -270,12 +302,14 @@ int main(int argc, char **argv)
 			/* intentional drop through from 301 to 302 */
 		    case 301:
 		    case 302:
-			urip = http_header_extract("ocation", recvbufp);
+			urip = http_get_keyval("Location", recvbufp);
 			if(param[V])
 			    {
-			    fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path);
+			    fprintf(stderr, "Redirecting to %s...\n", urip);
 			    }
 			redirnum++;
 			/* could use continue, but structured programming makes it easier to use goto in this circumstance... */
 			goto start;
 		    case 400:
@ -297,10 +331,8 @@ int main(int argc, char **argv)
 		}
 	    i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
 	    if(param[V] >= 2)
 		fprintf(stdout, "%s", recvbufp);
-	    if(param[V])
+	    if(param[V] >= 3)
 		{
 		fprintf(stderr, "fwrite: %d bytes\n", i);
 		}
@ -308,6 +340,7 @@ int main(int argc, char **argv)
 	    offsetp = recvbufp;
 	    }
 	tls_free(tlsc);
 	close(sockd);
 	fclose(filed);
 	free(sendbufp);
@ -327,3 +360,4 @@ int main(int argc, char **argv)
    fprintf(stderr, "%s: %s\n", argv[0], errstr);
    exit(EXIT_FAILURE);
    }
--- a/src/support.c
+++ b/src/support.c
@ -2,35 +2,34 @@
 /* return a properly formatted request for any implemented protocol */
 char *reqgen(uri *urip)
  {
  char *req;
  int is_tls = 0;
  if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
    {
-    reqgen_http(urip->path, urip->fqdn, &req);
+    char *req;
-    if(!req)
+    if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
-      {
+	{
-      return(NULL);
+	reqgen_http(urip->path, urip->fqdn, &req);
      }
-    return(req);
+	if(!req)
 	    {
 	    return(NULL);
 	    }
 	return(req);
 	}
    else if(!strcmp(urip->proto, "gopher"))
 	{
 	reqgen_gopher(urip->path, &req);
 	if(!req)
 	    {
 	    return(NULL);
 	    }
 	return(req);
 	}
    return(NULL);
    }
  else if(!strcmp(urip->proto, "gopher"))
    {
    reqgen_gopher(urip->path, &req);
    if(!req)
      {
      return(NULL);
      }
    return(req);
    }
  return(NULL);
  }
 /* takes a data buffer and returns an integer corresponding to the server's response value */
 /* if not applicable, return -1 */
@ -46,7 +45,7 @@ int resp_parse(char *data, uri *uristruct)
 	return(-1);
 	}
-	}
+    }
 /* return a pointer to a character array on the heap consisting of all bytes */
 /* between start and end in str. */
@ -59,35 +58,26 @@ char *substr_extract(const char *str, int start, int end)
    substr = NULL;
    /* account for zero index plus the nullterm */
-    if( !(substr = malloc((substr_len + 1))))
+    if( !(substr = calloc((substr_len + 1), sizeof(char))))
 	{
 	return(NULL);
 	}
-  memcpy(substr, str+start, substr_len);
+    memcpy(substr, str+start, substr_len);
-  return(substr);
+    return(substr);
-  }
+    }
-
+
-/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */
+char *buftolower(char *bufp)
-/* and str2 joined end-to-end. */ 
+    {
-char *mastrcat(char *str1, char *str2)
+    int i;
-    {
+    char *nbufp;
-    unsigned long int nbi, stri, nbsize;
+    nbufp = calloc(strlen(bufp), sizeof(char));
-    char *nbuf;
+    
-    nbi = stri = 0;
+    for(i = 0; '\0' != bufp[i]; i++)
-    nbsize = (strlen(str1) + strlen(str2));
+	{
-    nbuf = malloc(nbsize);
+	nbufp[i] = tolower(bufp[i]);
-
+	}
-    for(stri = 0; str1[stri] != '\0'; nbi++, stri++)
+    
-        {
+    return(nbufp);
        nbuf[nbi] = str1[stri];
        }
    for(stri = 0; str2[stri] != '\0'; nbi++, stri++)
        {
        nbuf[nbi] = str2[stri];
        }
    return nbuf;
    }
--- a/src/support.h
+++ b/src/support.h
@ -37,4 +37,4 @@ typedef struct
 char *reqgen(uri *urip);
 int resp_parse(char *data, uri *uristruct);
 char *substr_extract(const char *str, int start, int end);
-char *mastrcat(char *str1, char *str2);
+char *buftolower(char *bufp);
--- a/src/uri.c
+++ b/src/uri.c
@ -2,14 +2,6 @@
 /* really hate using the preprocessor, but it makes sense in this context   */
 /* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
 /* this regex is from the RFC describing URI syntax -- can't recall the     */
 /* exact one right now. anyway, it's a little too general for my tastes,    */
 /* but the one I came up with was trash (unsurprisingly) so here we are.    */
 /* need to modify this in the future to be less liberal...                  */
 #define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
 #define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
 #define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
 #define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
@ -21,10 +13,8 @@
 int uri_parse(const char *uristr, uri *res)
    {
    int i;
    int regerrcode;
    char validp;
    char *pathp;
    regex_t regexp;
    regmatch_t match[SUBSTR_COUNT+5];
    char errbuf[BUFSIZ] = {0};
@ -49,6 +39,7 @@ int uri_parse(const char *uristr, uri *res)
    /* with experience.                                                    */
    res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
    res->fqdn  = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
    /* if the difference below is less than 1, our path doesn't exist. */
    /* Compensate by setting it to '/' which will always return a root */
    /* document from an HTTP server -- and, presumably, others. We'll  */
@ -61,16 +52,6 @@ int uri_parse(const char *uristr, uri *res)
 	{
 	/* we only have a simple path */
 	res->path  = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
 	/* /\* we have a more complex path *\/ */
 	/* if(0 != match[PATH+1].rm_so) */
 	/*     { */
 	/*     for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
 	/* 	{ */
 	/* 	/\* memory leak here that needs to be addressed *\/ */
 	/* 	res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
 	/* 	} */
 	/*     } */
 	}