See CHANGES.
This commit is contained in:
parent
b5e1a30092
commit
b35d49cc28
6
CHANGES
6
CHANGES
|
@ -1,3 +1,9 @@
|
||||||
|
2023/02/19 - v0.3.2
|
||||||
|
-Improved URI parsing again
|
||||||
|
-Improved HTTP redirect handling capability
|
||||||
|
-Moved TLS out of dial and into main
|
||||||
|
-Improved debugging experience by shuffling around if(param[V])s
|
||||||
|
|
||||||
2022/12/13 - v0.3.1b
|
2022/12/13 - v0.3.1b
|
||||||
-Fixed Gopher support
|
-Fixed Gopher support
|
||||||
-Improved URI parsing
|
-Improved URI parsing
|
||||||
|
|
4
README
4
README
|
@ -20,7 +20,7 @@ Right now, it retrieves data over HTTP or Gopher, and can support TLS,
|
||||||
at least when it comes to HTTPS. I'd like a more general approach to
|
at least when it comes to HTTPS. I'd like a more general approach to
|
||||||
TLS support in the future.
|
TLS support in the future.
|
||||||
|
|
||||||
Its source code is small (< 400 lines of C total according to cloc right
|
Its source code is small (< 500 lines of C total according to cloc right
|
||||||
now) and uses only POSIX routines -- there's no temptation to use GNU
|
now) and uses only POSIX routines -- there's no temptation to use GNU
|
||||||
or BSD features because I develop against musl and tcc, and I don't even
|
or BSD features because I develop against musl and tcc, and I don't even
|
||||||
have the Linux manpages on any of my systems.
|
have the Linux manpages on any of my systems.
|
||||||
|
@ -36,7 +36,7 @@ Apportate also aims to have actually useful diagnostics; that is,
|
||||||
compared to other tools, apportate aims to only provide useful error
|
compared to other tools, apportate aims to only provide useful error
|
||||||
output. In the case of success, it follows the Rule of Silence; on
|
output. In the case of success, it follows the Rule of Silence; on
|
||||||
unrecoverable errors, it aborts immediately. It supports multiple levels
|
unrecoverable errors, it aborts immediately. It supports multiple levels
|
||||||
of verbosity, and exposes almost all of its internals.
|
of verbosity, and exposes almost all of its internal operations for debugging..
|
||||||
|
|
||||||
Its simple design and use should also make it relatively convenient
|
Its simple design and use should also make it relatively convenient
|
||||||
for inclusion in shell scripts.
|
for inclusion in shell scripts.
|
||||||
|
|
|
@ -1,19 +1,18 @@
|
||||||
#include "headers.h"
|
#include "headers.h"
|
||||||
|
|
||||||
|
|
||||||
int dial(const char *fqdn, const char *proto, struct tls **tlsres)
|
int dial(const char *fqdn, const char *proto)
|
||||||
{
|
{
|
||||||
int sd;
|
int sd;
|
||||||
struct addrinfo *ainfo;
|
struct addrinfo *ainfo;
|
||||||
struct tls_config *tlshints;
|
|
||||||
|
|
||||||
|
|
||||||
if(getaddrinfo(fqdn, proto, 0, &ainfo))
|
if( !(sd = socket(AF_INET, SOCK_STREAM, 0)))
|
||||||
{
|
{
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !(sd = socket(ainfo->ai_family, SOCK_STREAM, 0)))
|
if(getaddrinfo(fqdn, proto, 0, &ainfo))
|
||||||
{
|
{
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
@ -23,34 +22,5 @@ int dial(const char *fqdn, const char *proto, struct tls **tlsres)
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(tlsres != 0)
|
|
||||||
{
|
|
||||||
close(sd);
|
|
||||||
|
|
||||||
if( 0 == (*tlsres = tls_client()))
|
|
||||||
{
|
|
||||||
goto err_ssl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( 0 == (tlshints = tls_config_new()))
|
|
||||||
{
|
|
||||||
goto err_ssl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(tls_configure(*tlsres, tlshints))
|
|
||||||
{
|
|
||||||
goto err_ssl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if( (tls_connect(*tlsres, fqdn, proto)))
|
|
||||||
{
|
|
||||||
goto err_ssl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return(sd);
|
return(sd);
|
||||||
|
|
||||||
err_ssl:
|
|
||||||
return(0);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,4 +8,4 @@
|
||||||
/* sd -- successful connection returns a file descriptor connected to the fqdn */
|
/* sd -- successful connection returns a file descriptor connected to the fqdn */
|
||||||
/* ERRCONN -- couldn't connect */
|
/* ERRCONN -- couldn't connect */
|
||||||
/* ERRADDR -- couldn't get addrinfo */
|
/* ERRADDR -- couldn't get addrinfo */
|
||||||
int dial(const char *fqdn, const char *proto, struct tls **tls_res);
|
int dial(const char *fqdn, const char *proto);
|
||||||
|
|
83
src/http.c
83
src/http.c
|
@ -1,27 +1,27 @@
|
||||||
#include "headers.h"
|
#include "headers.h"
|
||||||
|
|
||||||
const char REQ_HTTP[] =
|
const char REQ_HTTP[] =
|
||||||
{
|
{
|
||||||
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
|
"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
int reqgen_http(const char *path, const char *fqdn, char **nbuf)
|
int reqgen_http(const char *path, const char *fqdn, char **nbuf)
|
||||||
{
|
{
|
||||||
int buflen;
|
int buflen;
|
||||||
|
|
||||||
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
|
buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1);
|
||||||
|
|
||||||
if( !(*nbuf = calloc(buflen, sizeof(char))))
|
if( !(*nbuf = calloc(buflen, sizeof(char))))
|
||||||
{
|
{
|
||||||
return(ERRMEM);
|
return(ERRMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
sprintf(*nbuf, REQ_HTTP, path, fqdn);
|
sprintf(*nbuf, REQ_HTTP, path, fqdn);
|
||||||
|
|
||||||
|
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int resp_parse_http(char *data)
|
int resp_parse_http(char *data)
|
||||||
{
|
{
|
||||||
|
@ -41,32 +41,51 @@ int resp_parse_http(char *data)
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *http_header_extract(char *key, char *data)
|
/* http_header_extract - return the value corresponding to a key if it exists in an http response, otherwise null */
|
||||||
|
/* key - key to extract corresponding value */
|
||||||
|
/* data - response header from which to extract key's value */
|
||||||
|
char *http_get_keyval(char *key, char *data)
|
||||||
{
|
{
|
||||||
char *keyp, *keyp_end;
|
char *buf;
|
||||||
char *returnp;
|
/* data indices */
|
||||||
|
char *d_ind, *d_ind2;
|
||||||
|
|
||||||
if( NULL == (keyp = strstr(data, key)))
|
buf = NULL;
|
||||||
{
|
d_ind = d_ind2 = data;
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(; 0 != *keyp && *keyp != ':'; keyp++);
|
|
||||||
for(; 0 != *keyp && !isalnum(*keyp); keyp++);
|
|
||||||
for(keyp_end = keyp; 0 != *keyp_end && '\r' != *keyp_end; keyp_end++);
|
|
||||||
|
|
||||||
if( NULL == (returnp = calloc((int) (keyp_end - keyp), sizeof(char))))
|
|
||||||
|
/* we ensure that our key and each key we compare to are lower-case because some */
|
||||||
|
/* servers will return mixed-case keys and others single-case. by doing this we */
|
||||||
|
/* can use full key specifiers, which avoids false matches that would occur as a */
|
||||||
|
/* result of using partial key specifiers e.g "location" instead of "ocation". */
|
||||||
|
key = buftolower(key);
|
||||||
|
|
||||||
|
|
||||||
|
for(;*data != '\0'; data++)
|
||||||
|
{
|
||||||
|
if(NULL != buf)
|
||||||
{
|
{
|
||||||
goto err;
|
free(buf);
|
||||||
|
buf = NULL;
|
||||||
|
|
||||||
|
for(; '\n' != *(data - 1); data++);
|
||||||
|
d_ind = d_ind2 = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(returnp, keyp, (keyp_end - keyp));
|
for(; *d_ind != ':'; d_ind++);
|
||||||
|
buf = substr_extract(data, 0, (d_ind - data));
|
||||||
|
|
||||||
|
buf = buftolower(buf);
|
||||||
|
|
||||||
|
if(!strcmp(key, buf))
|
||||||
|
{
|
||||||
|
free(buf);
|
||||||
|
for(data = d_ind+2; *(1+d_ind) != '\n'; d_ind++);
|
||||||
|
|
||||||
|
buf = substr_extract(data, 0, (d_ind - data));
|
||||||
|
return(buf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return(returnp);
|
|
||||||
|
|
||||||
err:
|
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,4 +16,4 @@ int resp_parse_http(char *data);
|
||||||
exit();
|
exit();
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
char *http_header_extract(char *key, char *data);
|
char *http_get_keyval(char *key, char *data);
|
||||||
|
|
118
src/main.c
118
src/main.c
|
@ -36,7 +36,7 @@ char *outpath;
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
int i, translen, redirnum;
|
int i, translen, redirnum, gotheader;
|
||||||
int sockd;
|
int sockd;
|
||||||
char *recvbufp;
|
char *recvbufp;
|
||||||
char *sendbufp, *offsetp, *errstr;
|
char *sendbufp, *offsetp, *errstr;
|
||||||
|
@ -60,6 +60,7 @@ int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
goto usage;
|
goto usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
|
for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0)
|
||||||
{
|
{
|
||||||
switch(i)
|
switch(i)
|
||||||
|
@ -80,7 +81,7 @@ int main(int argc, char **argv)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'v':
|
case 'v':
|
||||||
/* we handle v differently because we want to support different levels of verbosity */
|
/* we handle v differently because we want to support different levels of verbosity */
|
||||||
if(!param[Q])
|
if(!param[Q])
|
||||||
{
|
{
|
||||||
param[V]++;
|
param[V]++;
|
||||||
|
@ -88,7 +89,7 @@ int main(int argc, char **argv)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
goto usage;
|
goto usage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
urip = argv[optind];
|
urip = argv[optind];
|
||||||
|
@ -125,26 +126,37 @@ int main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* todo: init uristruct as well */
|
/* todo: init uristruct as well */
|
||||||
|
|
||||||
|
uristruct.proto = NULL;
|
||||||
|
uristruct.fqdn = NULL;
|
||||||
|
uristruct.path = NULL;
|
||||||
|
|
||||||
if( uri_parse(urip, &uristruct))
|
if( uri_parse(urip, &uristruct))
|
||||||
{
|
{
|
||||||
errstr = "couldn't parse URI.";
|
errstr = "couldn't parse URI.";
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param[V])
|
if(param[V] >= 2)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
|
fprintf(stderr, "URI parsed, results follow...\nProtocol: %s\nFQDN: %s\nPath: %s\n", uristruct.proto, uristruct.fqdn, uristruct.path);
|
||||||
|
|
||||||
|
if(param[V] >= 3)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "length of proto: %lu\nlength of fqdn: %lu\nlength of path: %lu\n", strlen(uristruct.proto), strlen(uristruct.fqdn), strlen(uristruct.path));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sendbufp = reqgen(&uristruct);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* we should probably modify uri_parse to return a zero value on failure... */
|
/* we should probably modify uri_parse to return a zero value on failure... */
|
||||||
/* make errstr display the URI -- need mastrcat? */
|
|
||||||
if( NULL == (sendbufp = reqgen(&uristruct)))
|
if( NULL == (sendbufp = reqgen(&uristruct)))
|
||||||
{
|
{
|
||||||
errstr = "couldn't generate request. Unknown protocol?";
|
fprintf(stderr, "couldn't generate request. Unknown protocol: %s?\n", uristruct.proto);
|
||||||
goto err;
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if outpath isn't set because we haven't received a -o param, */
|
/* if outpath isn't set because we haven't received a -o param, */
|
||||||
|
@ -158,57 +170,73 @@ int main(int argc, char **argv)
|
||||||
outpath = "default";
|
outpath = "default";
|
||||||
}
|
}
|
||||||
|
|
||||||
/* todo: handle TLS a little better. use a global variable, move TLS handling out of dial? */
|
|
||||||
if(param[V])
|
if(param[V])
|
||||||
{
|
{
|
||||||
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
|
fprintf(stderr, "connecting to %s using protocol %s...\n", uristruct.fqdn, uristruct.proto);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* definitely going to break tls out of dial, this is really bad */
|
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
|
||||||
if(!strcmp("https", uristruct.proto))
|
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto)))
|
||||||
{
|
{
|
||||||
if(!dial(uristruct.fqdn, uristruct.proto, &tlsc))
|
errstr = "failed to connect";
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* todo: upgrade this to a more general mechanism */
|
||||||
|
if(!strncmp("https", uristruct.proto, 5))
|
||||||
|
{
|
||||||
|
if(NULL != tlsc)
|
||||||
{
|
{
|
||||||
errstr = "failed to connect";
|
tls_reset(tlsc);
|
||||||
goto err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param[V])
|
struct tls_config *config = tls_config_new();
|
||||||
|
tlsc = tls_client();
|
||||||
|
tls_configure(tlsc, config);
|
||||||
|
|
||||||
|
if(-1 == tls_connect_socket(tlsc, sockd, uristruct.fqdn))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "setting up TLS...\n");
|
errstr = "failed to upgrade connection to use TLS, aborting\n";
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* once again, I'll repeat myself -- while a system *COULD* return 0, 1, or 2, it *SHOULD NEVER DO SO* in a sane environment */
|
|
||||||
if( 2 >= (sockd = dial(uristruct.fqdn, uristruct.proto, NULL)))
|
|
||||||
{
|
|
||||||
errstr = "failed to connect";
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param[V])
|
if(param[V] >= 2)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Sending request...\n %s\n", sendbufp);
|
fprintf(stderr, "Sending request...\n-----REQUEST START-----\n%s\n-----REQUEST END-----\n", sendbufp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(NULL != tlsc)
|
if(NULL != tlsc)
|
||||||
{
|
{
|
||||||
i = tls_write(tlsc, sendbufp, strlen(sendbufp));
|
if(param[V])
|
||||||
|
{
|
||||||
|
fprintf(stderr, "writing over tls...\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if( -1 == (i = tls_write(tlsc, sendbufp, strlen(sendbufp))))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "libtls internal error: ");
|
||||||
|
errstr = (char *) tls_error(tlsc);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if(param[V])
|
||||||
|
{
|
||||||
|
fprintf(stderr, "writing over socket...\n");
|
||||||
|
}
|
||||||
|
|
||||||
i = send(sockd, sendbufp, strlen(sendbufp), 0);
|
i = send(sockd, sendbufp, strlen(sendbufp), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param[V])
|
if(param[V] >= 3)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "sent: %d bytes\n", i);
|
fprintf(stderr, "sent: %d bytes\n", i);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* actual read loop */
|
/* actual read loop */
|
||||||
int gotheader;
|
|
||||||
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
|
for(gotheader = 0, translen = 1; translen; memset(recvbufp, 0, BUFSIZ+1))
|
||||||
{
|
{
|
||||||
if(NULL == tlsc)
|
if(NULL == tlsc)
|
||||||
|
@ -220,7 +248,7 @@ int main(int argc, char **argv)
|
||||||
translen = tls_read(tlsc, recvbufp, BUFSIZ);
|
translen = tls_read(tlsc, recvbufp, BUFSIZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param[V])
|
if(param[V] >= 3)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "recv: %d bytes\n", translen);
|
fprintf(stderr, "recv: %d bytes\n", translen);
|
||||||
}
|
}
|
||||||
|
@ -241,7 +269,7 @@ int main(int argc, char **argv)
|
||||||
case 200:
|
case 200:
|
||||||
/* by now we have the first transmission from the server that we actually care about */
|
/* by now we have the first transmission from the server that we actually care about */
|
||||||
/* we just need to get the the end of the headres, now that we're done with 'em */
|
/* we just need to get the the end of the headres, now that we're done with 'em */
|
||||||
if(param[V])
|
if(param[V] >= 3)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "200 OKAY, moving to end of header...\n");
|
fprintf(stderr, "200 OKAY, moving to end of header...\n");
|
||||||
}
|
}
|
||||||
|
@ -249,7 +277,11 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
|
for(; NULL == (offsetp = strstr(recvbufp, "\r\n\r\n"));)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Searching to end of header...\n");
|
if(param[V] >= 2)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Searching to end of header...\n");
|
||||||
|
}
|
||||||
|
|
||||||
if(NULL != tlsc)
|
if(NULL != tlsc)
|
||||||
{
|
{
|
||||||
tls_read(tlsc, recvbufp, BUFSIZ);
|
tls_read(tlsc, recvbufp, BUFSIZ);
|
||||||
|
@ -270,12 +302,14 @@ int main(int argc, char **argv)
|
||||||
/* intentional drop through from 301 to 302 */
|
/* intentional drop through from 301 to 302 */
|
||||||
case 301:
|
case 301:
|
||||||
case 302:
|
case 302:
|
||||||
urip = http_header_extract("ocation", recvbufp);
|
urip = http_get_keyval("Location", recvbufp);
|
||||||
if(param[V])
|
if(param[V])
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Redirecting to %s%s%s...", uristruct.proto, uristruct.fqdn, uristruct.path);
|
fprintf(stderr, "Redirecting to %s...\n", urip);
|
||||||
}
|
}
|
||||||
|
|
||||||
redirnum++;
|
redirnum++;
|
||||||
|
/* could use continue, but structured programming makes it easier to use goto in this circumstance... */
|
||||||
goto start;
|
goto start;
|
||||||
|
|
||||||
case 400:
|
case 400:
|
||||||
|
@ -297,10 +331,8 @@ int main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
|
i = fwrite(offsetp, sizeof(char), translen - (offsetp - recvbufp), filed);
|
||||||
if(param[V] >= 2)
|
|
||||||
fprintf(stdout, "%s", recvbufp);
|
|
||||||
|
|
||||||
if(param[V])
|
if(param[V] >= 3)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "fwrite: %d bytes\n", i);
|
fprintf(stderr, "fwrite: %d bytes\n", i);
|
||||||
}
|
}
|
||||||
|
@ -308,6 +340,7 @@ int main(int argc, char **argv)
|
||||||
offsetp = recvbufp;
|
offsetp = recvbufp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tls_free(tlsc);
|
||||||
close(sockd);
|
close(sockd);
|
||||||
fclose(filed);
|
fclose(filed);
|
||||||
free(sendbufp);
|
free(sendbufp);
|
||||||
|
@ -327,3 +360,4 @@ int main(int argc, char **argv)
|
||||||
fprintf(stderr, "%s: %s\n", argv[0], errstr);
|
fprintf(stderr, "%s: %s\n", argv[0], errstr);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,35 +2,34 @@
|
||||||
|
|
||||||
/* return a properly formatted request for any implemented protocol */
|
/* return a properly formatted request for any implemented protocol */
|
||||||
char *reqgen(uri *urip)
|
char *reqgen(uri *urip)
|
||||||
{
|
|
||||||
char *req;
|
|
||||||
int is_tls = 0;
|
|
||||||
|
|
||||||
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
|
|
||||||
{
|
{
|
||||||
reqgen_http(urip->path, urip->fqdn, &req);
|
char *req;
|
||||||
|
|
||||||
if(!req)
|
if(!strcmp("http", urip->proto) || !strcmp("https", urip->proto))
|
||||||
{
|
{
|
||||||
return(NULL);
|
reqgen_http(urip->path, urip->fqdn, &req);
|
||||||
}
|
|
||||||
|
|
||||||
return(req);
|
if(!req)
|
||||||
|
{
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(req);
|
||||||
|
}
|
||||||
|
else if(!strcmp(urip->proto, "gopher"))
|
||||||
|
{
|
||||||
|
reqgen_gopher(urip->path, &req);
|
||||||
|
|
||||||
|
if(!req)
|
||||||
|
{
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
}
|
}
|
||||||
else if(!strcmp(urip->proto, "gopher"))
|
|
||||||
{
|
|
||||||
reqgen_gopher(urip->path, &req);
|
|
||||||
|
|
||||||
if(!req)
|
|
||||||
{
|
|
||||||
return(NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return(req);
|
|
||||||
}
|
|
||||||
|
|
||||||
return(NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* takes a data buffer and returns an integer corresponding to the server's response value */
|
/* takes a data buffer and returns an integer corresponding to the server's response value */
|
||||||
/* if not applicable, return -1 */
|
/* if not applicable, return -1 */
|
||||||
|
@ -46,7 +45,7 @@ int resp_parse(char *data, uri *uristruct)
|
||||||
return(-1);
|
return(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return a pointer to a character array on the heap consisting of all bytes */
|
/* return a pointer to a character array on the heap consisting of all bytes */
|
||||||
/* between start and end in str. */
|
/* between start and end in str. */
|
||||||
|
@ -59,35 +58,26 @@ char *substr_extract(const char *str, int start, int end)
|
||||||
substr = NULL;
|
substr = NULL;
|
||||||
|
|
||||||
/* account for zero index plus the nullterm */
|
/* account for zero index plus the nullterm */
|
||||||
if( !(substr = malloc((substr_len + 1))))
|
if( !(substr = calloc((substr_len + 1), sizeof(char))))
|
||||||
{
|
{
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(substr, str+start, substr_len);
|
memcpy(substr, str+start, substr_len);
|
||||||
|
|
||||||
return(substr);
|
return(substr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* mastrcat -- improved string concat function. returns a pointer to the first element in a buffer containing the strings str1 */
|
char *buftolower(char *bufp)
|
||||||
/* and str2 joined end-to-end. */
|
{
|
||||||
char *mastrcat(char *str1, char *str2)
|
int i;
|
||||||
{
|
char *nbufp;
|
||||||
unsigned long int nbi, stri, nbsize;
|
nbufp = calloc(strlen(bufp), sizeof(char));
|
||||||
char *nbuf;
|
|
||||||
nbi = stri = 0;
|
for(i = 0; '\0' != bufp[i]; i++)
|
||||||
nbsize = (strlen(str1) + strlen(str2));
|
{
|
||||||
nbuf = malloc(nbsize);
|
nbufp[i] = tolower(bufp[i]);
|
||||||
|
}
|
||||||
for(stri = 0; str1[stri] != '\0'; nbi++, stri++)
|
|
||||||
{
|
return(nbufp);
|
||||||
nbuf[nbi] = str1[stri];
|
|
||||||
}
|
|
||||||
|
|
||||||
for(stri = 0; str2[stri] != '\0'; nbi++, stri++)
|
|
||||||
{
|
|
||||||
nbuf[nbi] = str2[stri];
|
|
||||||
}
|
|
||||||
|
|
||||||
return nbuf;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,4 +37,4 @@ typedef struct
|
||||||
char *reqgen(uri *urip);
|
char *reqgen(uri *urip);
|
||||||
int resp_parse(char *data, uri *uristruct);
|
int resp_parse(char *data, uri *uristruct);
|
||||||
char *substr_extract(const char *str, int start, int end);
|
char *substr_extract(const char *str, int start, int end);
|
||||||
char *mastrcat(char *str1, char *str2);
|
char *buftolower(char *bufp);
|
||||||
|
|
21
src/uri.c
21
src/uri.c
|
@ -2,14 +2,6 @@
|
||||||
|
|
||||||
|
|
||||||
/* really hate using the preprocessor, but it makes sense in this context */
|
/* really hate using the preprocessor, but it makes sense in this context */
|
||||||
/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */
|
|
||||||
/* this regex is from the RFC describing URI syntax -- can't recall the */
|
|
||||||
/* exact one right now. anyway, it's a little too general for my tastes, */
|
|
||||||
/* but the one I came up with was trash (unsurprisingly) so here we are. */
|
|
||||||
/* need to modify this in the future to be less liberal... */
|
|
||||||
#define REGEX_URI_OLD "^([^:/?#]+)://(([^/?#]+)+([^?#]*))(\\?([^#]*))?(#(.*))?"
|
|
||||||
#define REGEX_URI_RFC "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
|
|
||||||
#define REGEX_URI_NEW "^(([^:/?#]+):)?(//([^/?#]+))?([^?#]*)(\\?([^#]*))?(#(.*))?"
|
|
||||||
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
|
#define REGEX_URI "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,10 +13,8 @@
|
||||||
|
|
||||||
int uri_parse(const char *uristr, uri *res)
|
int uri_parse(const char *uristr, uri *res)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
int regerrcode;
|
int regerrcode;
|
||||||
char validp;
|
char validp;
|
||||||
char *pathp;
|
|
||||||
regex_t regexp;
|
regex_t regexp;
|
||||||
regmatch_t match[SUBSTR_COUNT+5];
|
regmatch_t match[SUBSTR_COUNT+5];
|
||||||
char errbuf[BUFSIZ] = {0};
|
char errbuf[BUFSIZ] = {0};
|
||||||
|
@ -49,6 +39,7 @@ int uri_parse(const char *uristr, uri *res)
|
||||||
/* with experience. */
|
/* with experience. */
|
||||||
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
|
res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo);
|
||||||
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
|
res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo);
|
||||||
|
|
||||||
/* if the difference below is less than 1, our path doesn't exist. */
|
/* if the difference below is less than 1, our path doesn't exist. */
|
||||||
/* Compensate by setting it to '/' which will always return a root */
|
/* Compensate by setting it to '/' which will always return a root */
|
||||||
/* document from an HTTP server -- and, presumably, others. We'll */
|
/* document from an HTTP server -- and, presumably, others. We'll */
|
||||||
|
@ -61,16 +52,6 @@ int uri_parse(const char *uristr, uri *res)
|
||||||
{
|
{
|
||||||
/* we only have a simple path */
|
/* we only have a simple path */
|
||||||
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
|
res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo);
|
||||||
/* /\* we have a more complex path *\/ */
|
|
||||||
/* if(0 != match[PATH+1].rm_so) */
|
|
||||||
/* { */
|
|
||||||
/* for(i = PATH; 0 != match[i].rm_so && i <= SUBSTR_COUNT; i++) */
|
|
||||||
/* { */
|
|
||||||
/* /\* memory leak here that needs to be addressed *\/ */
|
|
||||||
/* res->path = mastrcat(res->path, substr_extract(uristr, match[i].rm_so, match[i].rm_eo)); */
|
|
||||||
/* } */
|
|
||||||
/* } */
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue