From f9891e284c7893f97d050e15aa2c87d60f445aa4 Mon Sep 17 00:00:00 2001 From: Mid Favila Date: Thu, 24 Nov 2022 04:18:05 -0400 Subject: [PATCH] Initial commit. --- CHANGES | 0 LICENSE | 1 + Makefile | 0 README | 0 TODO | 11 ++++ src/Makefile | 13 ++++ src/connect.c | 42 +++++++++++++ src/connect.h | 11 ++++ src/headers.h | 21 +++++++ src/http.c | 24 ++++++++ src/http.h | 2 + src/main.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/support.c | 48 +++++++++++++++ src/support.h | 39 ++++++++++++ src/test.c | 18 ++++++ src/uri.c | 56 +++++++++++++++++ src/uri.h | 10 ++++ 17 files changed, 459 insertions(+) create mode 100644 CHANGES create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README create mode 100644 TODO create mode 100644 src/Makefile create mode 100644 src/connect.c create mode 100644 src/connect.h create mode 100644 src/headers.h create mode 100644 src/http.c create mode 100644 src/http.h create mode 100644 src/main.c create mode 100644 src/support.c create mode 100644 src/support.h create mode 100644 src/test.c create mode 100644 src/uri.c create mode 100644 src/uri.h diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3c916bd --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +GPL until I figure out a better option i guess idk diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..e69de29 diff --git a/TODO b/TODO new file mode 100644 index 0000000..b822499 --- /dev/null +++ b/TODO @@ -0,0 +1,11 @@ +>put file on disk +>get file from remote host +>send properly-formed request to host +>establish TLS connection (if necessary) +>connect the socket +>generate appropriate sockaddr_in structure +>call getaddrinfo +>initialize hints structures +>create socket +>create fd +>parse URI [done] \ No newline at end of file diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..ba511c6 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,13 @@ +.POSIX: + +OBJ = \ + connect.o \ + http.o \ + support.o \ + uri.o + +fetch: $(OBJ) + $(CC) $(LDFLAGS) $(CFLAGS) -o fetch $(OBJ) main.c $(LIBS) + +clean: + rm *.o fetch diff --git a/src/connect.c b/src/connect.c new file mode 100644 index 0000000..3c94dd2 --- /dev/null +++ b/src/connect.c @@ -0,0 +1,42 @@ +#include "headers.h" + +int dial(const char *fqdn, const char *proto) + { + int sd; + struct addrinfo hints; + struct addrinfo *ainfo; + + if( !(sd = socket(AF_INET, SOCK_STREAM, 0))) + { + return(0); + } + + /* using hints at all right now seems to break our connectivity... */ + /* POSIX demands that all fields of a hints struct are initialized */ + memset(&hints, 0, sizeof(struct addrinfo)); + /* only return IPv6/4 addresses if the system can handle them */ + hints.ai_flags = AI_ADDRCONFIG; + /* we want both IPv4 and IPv6 to be configured */ + hints.ai_family = AF_UNSPEC; + /* connecting via TCP stream */ + hints.ai_socktype = SOCK_STREAM; + /* using the default protocol for TCP over inet */ + /* seems to break fetch on BSD? */ + /* hints.ai_protocol = IPPROTO_TCP; */ + + + if(getaddrinfo(fqdn, proto, NULL, &ainfo)) + { + return(0); + } + + + + if(connect(sd, ainfo->ai_addr, sizeof(struct sockaddr))) + { + return(0); + } + + + return(sd); + } diff --git a/src/connect.h b/src/connect.h new file mode 100644 index 0000000..6e8a00c --- /dev/null +++ b/src/connect.h @@ -0,0 +1,11 @@ +/* dial -- open a TCP/IP connection to fqdn over port proto (or the port that */ +/* corresponds to it, according to /etc/services) */ +/* */ +/* fqdn -- character string containing the FQDN of the fqdn we're connecting to */ +/* proto -- character string containing either a numeric port spec or a human */ +/* readable protocol specification, such as "http" */ +/* return values */ +/* sd -- successful connection returns a file descriptor connected to the fqdn */ +/* ERRCONN -- couldn't connect */ +/* ERRADDR -- couldn't get addrinfo */ +int dial(const char *fqdn, const char *proto); \ No newline at end of file diff --git a/src/headers.h b/src/headers.h new file mode 100644 index 0000000..8556f30 --- /dev/null +++ b/src/headers.h @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* routines and variables used globally */ +#include "support.h" +/* URI handling routines */ +#include "uri.h" +/* generic TCP/IP networking functions */ +#include "connect.h" +/* HTTP 1.0 handling */ +#include "http.h" diff --git a/src/http.c b/src/http.c new file mode 100644 index 0000000..9cfb696 --- /dev/null +++ b/src/http.c @@ -0,0 +1,24 @@ +#include "headers.h" + +const char REQ_HTTP[] = + { + "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n" + }; + + +int reqgen_http(const char *path, const char *fqdn, char **nbuf) + { + int buflen; + + buflen = (strlen(REQ_HTTP) + strlen(path) + strlen(fqdn) + 1); + + if( !(*nbuf = malloc(buflen))) + { + return(ERRMEM); + } + memset(*nbuf, 0, buflen); + + sprintf(*nbuf, REQ_HTTP, path, fqdn); + + return(0); + } \ No newline at end of file diff --git a/src/http.h b/src/http.h new file mode 100644 index 0000000..b7e87a1 --- /dev/null +++ b/src/http.h @@ -0,0 +1,2 @@ +int reqgen_http(const char *path, const char *fqdn, char **nbuf);int http_content_length(char *data); +char *http_skip_header(char *data, int recvlen); \ No newline at end of file diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..7113382 --- /dev/null +++ b/src/main.c @@ -0,0 +1,163 @@ +#include "headers.h" + + +enum + { + /* display a status bar */ + B, + /* put the file somewhere other than the current directory */ + O, + /* silence all output -- the default */ + /* note that if -q is passed explicitly, it will irrevocably override -v and -b */ + Q, + /* increase the verbosity level. higher levels imply you want the content lower levels would provide */ + /* all verbose output is sent to stderr for ease of capture */ + /* 0: no informational, statistical or debug output */ + /* 1: informational output (Sending request FOO to host BAR, Received response header BAZ, etc) */ + /* 2: statistical. informational+size of header and body in bytes, round-trip time, etc */ + /* 3+: debug. statistical+all internal state changes */ + V + }; + + +int main(int argc, char **argv) + { + int i, sockd, recvlen, recv_header; + char recvbuf[BUFSIZ] = {0}; + char param[4] = {0}; + char *outpath, *sendbufp, *offsetp; + FILE *filed; + uri uristruct; + + outpath = sendbufp = offsetp = 0; + i = sockd = recvlen = recv_header = 0; + filed = 0; + + + if(argc < 2) + { + fprintf(stderr, "%s: need args\n", argv[0]); + goto err; + } + + for(i = 0; (i = getopt(argc, argv, "bo:qv")) != -1; i = 0) + { + switch(i) + { + case 'b': + param[B] = 1; + break; + case 'o': + if(!param[Q]) + { + param[O] = 1; + } + outpath = optarg; + break; + case 'q': + param[Q] = 1; + param[B] = 0; + param[V] = 0; + break; + case 'v': + /* we handle v differently because we want to support different levels of verbosity */ + if(!param[Q]) + { + param[V]++; + } + break; + default: + errno = ERRUSAGE; + goto err; + } + } + + + if(uri_parse(argv[optind], &uristruct)) + { + fprintf(stderr, "%s: invalid URI.\n", argv[0]); + goto err; + } + if(!strlen(uristruct.path)) + { + uristruct.path = "/"; + } + + if(!outpath && !param[O]) + { + if(strlen(uristruct.path) == 0 || !strcmp(uristruct.path, "/")) + { + outpath = "default"; + } + else + { + outpath = (1 + strrchr(uristruct.path, '/')); + } + } + + sendbufp = reqgen(&uristruct); + + if(!(filed = fopen(outpath, "w"))) + { + printf("filed err\n"); + goto err; + } + + if( !(sockd = dial(uristruct.fqdn, uristruct.proto))) + { + printf("%s: couldn't connect to host %s.\n", argv[0], uristruct.fqdn); + goto err; + } + + if(!send(sockd, sendbufp, strlen(sendbufp), 0)) + { + printf("send err\n"); + goto err; + } + + printf("%s\n", sendbufp); + + for(recv_header = 0, recvlen =1; sockd; memset(recvbuf, 0, BUFSIZ)) + { + recvlen = recv(sockd, recvbuf, BUFSIZ, 0); + switch(recvlen) + { + case -1: + goto err; + + case 0: + close(sockd); + sockd = 0; + break; + + default: + break; + } + + if(!recv_header) + { + offsetp = (strstr(recvbuf, "\r\n\r\n") + 4); + i = (int) (offsetp - recvbuf); + + recv_header = 1; + } + else + { + offsetp = recvbuf; + i = 0; + } + + if(param[V] >= 2) + { + printf("%s: received %d bytes...\n", argv[0], recvlen); + } + + fwrite(offsetp, sizeof(char), recvlen-i, filed); + } + + + exit(EXIT_SUCCESS); + + err: + exit(EXIT_FAILURE); + } diff --git a/src/support.c b/src/support.c new file mode 100644 index 0000000..842f55c --- /dev/null +++ b/src/support.c @@ -0,0 +1,48 @@ +#include "headers.h" + +/* return a properly formatted request for any implemented protocol */ +char *reqgen(uri *urip) + { + char *req; + + if(!strcmp("http", urip->proto)) + { + reqgen_http(urip->path, urip->fqdn, &req); + + if(!req) + { + return(NULL); + } + + return(req); + } + + return(NULL); + } + +void throw(int errcode) + { + fprintf(stderr, "fetch: error %d.\n", errcode); + exit(errcode); + } + +/* return a pointer to a character array on the heap consisting of all bytes */ +/* between start and end in str. */ +char *substr_extract(const char *str, int start, int end) + { + int substr_len; + char *substr; + + substr_len = (end - start); + substr = 0; + + /* account for zero index plus the nullterm */ + if( !(substr = malloc((substr_len + 1)))) + { + return(NULL); + } + + memcpy(substr, str+start, substr_len); + + return(substr); + } \ No newline at end of file diff --git a/src/support.h b/src/support.h new file mode 100644 index 0000000..acd007e --- /dev/null +++ b/src/support.h @@ -0,0 +1,39 @@ +/* return codes */ +enum + { + /* failed to allocate or otherwise access memory */ + ERRMEM = -20, + + /* URI appears to be malformed */ + ERRMALF, + + /* couldn't create a socket */ + ERRSOCK, + + /* addrinfo couldn't init */ + ERRADDR, + + /* couldn't connect */ + ERRCONN, + + ERRUSAGE, + + /* */ + + /* no error, everything is okay */ + ERROKAY = 0 + }; + + +typedef struct + { + char *proto; + char *fqdn; + char *path; + } uri; + + + +char *reqgen(uri *urip); +void throw(int errcode); +char *substr_extract(const char *str, int start, int end); diff --git a/src/test.c b/src/test.c new file mode 100644 index 0000000..b67fe9f --- /dev/null +++ b/src/test.c @@ -0,0 +1,18 @@ +#include "headers.h" + +int main(int argc, char **argv) +{ + char *req; + uri uris; + int isntvalid = uri_parse(argv[1], &uris); + + if(isntvalid) + { + printf("invalid uri\n"); + exit(1); + } + + req = reqgen(&uris); + printf("valid: %d\nproto: %s\nfqdn: %s\npath: %s\nreq: %s\n", isntvalid, uris.proto, uris.fqdn, uris.path, req); + +} diff --git a/src/uri.c b/src/uri.c new file mode 100644 index 0000000..c1094ca --- /dev/null +++ b/src/uri.c @@ -0,0 +1,56 @@ +#include "headers.h" + + +/* really hate using the preprocessor, but it makes sense in this context */ +/* SUBSTR_COUNT is number of parenthetical substrings in REGEX_URI plus one */ +/* this regex is from the RFC describing URI syntax -- can't recall the */ +/* exact one right now. anyway, it's a little too general for my tastes, */ +/* but the one I came up with was trash (unsurprisingly) so here we are. */ +/* need to modify this in the future to be less liberal... */ +#define REGEX_URI "^([^:/?#]+)://(([^/?#]*)+([^?#]*))(\\?([^#]*))?(#(.*))?" + + +#define SUBSTR_COUNT 8 +#define PROTO 1 +#define FQDN 3 +#define PATH 4 + + +int uri_parse(const char *uristr, uri *res) + { + char validp; + regex_t regexp; + regmatch_t match[SUBSTR_COUNT]; + + validp = 0; + + + regcomp(®exp, REGEX_URI, REG_EXTENDED); + if( (validp = regexec(®exp, uristr, SUBSTR_COUNT, match, 0))) + { + return(1); + } + + + /* not very elegant but it does the job. i shouldn't be thinking about */ + /* elegance at this stage in my programming life, anyways... comes */ + /* with experience. */ + res->proto = substr_extract(uristr, match[PROTO].rm_so, match[PROTO].rm_eo); + res->fqdn = substr_extract(uristr, match[FQDN].rm_so, match[FQDN].rm_eo); + + /* if (match[3].rm_eo - match[3].rm_so) is 1, we need to set it to 0 */ + /* all it will contain in a valid uri is a '/', which should be */ + /* handled the same as nothing. */ + if((match[PATH].rm_eo - match[PATH].rm_so) == 1) + { + match[PATH].rm_eo = match[PATH].rm_so; + } + res->path = substr_extract(uristr, match[PATH].rm_so, match[PATH].rm_eo); + + + return(0); + } + + + + diff --git a/src/uri.h b/src/uri.h new file mode 100644 index 0000000..6d342c6 --- /dev/null +++ b/src/uri.h @@ -0,0 +1,10 @@ +/* uri_parse -- split a URI into its components */ +/* uri_str -- the URI to split */ +/* return values */ +/* 0/ERROKAY: success */ +/* -1/ERRMALF: malformed URI */ +/* -2/ERRMEM: memory failure */ +/* this is really awful but I wrote it at 0200... need to rewrite it to use */ +/* regex for URI validation. as it stands, it doesn't detect malformed URIs */ +/* properly. it *does* split them fine, though. */ +int uri_parse(const char *uri_str, uri *res);