commit 4a75466f85af5057f3274b4404c5387abc013252 Author: Neil Date: Sat Jun 12 18:09:56 2021 -0700 Under version-control. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..85f81db --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +BDIR := bin + +PROJ := FindPatterns +FILES := findpat +OBJS := $(patsubst %,$(BDIR)/%.o,$(FILES)) +SRCS := $(patsubst %,%.c,$(FILES)) +H := $(patsubst %,%.h,$(FILES)) + +CC := gcc +OF := -Wall -O3 -fasm -fomit-frame-pointer -ffast-math -funroll-loops -fasm -fomit-frame-pointer -ffast-math -funroll-loops -pedantic -ansi +CF := -Wall -O3 -fasm -fomit-frame-pointer -ffast-math -funroll-loops -fasm -fomit-frame-pointer -ffast-math -funroll-loops -pedantic -ansi + +default: $(BDIR)/$(PROJ) + +$(BDIR)/$(PROJ): $(OBJS) + $(CC) $(OF) $(CF) $^ -o $@ + +$(BDIR)/%.o: %.c + $(CC) $(OF) -c $< -o $@ + +.PHONY: clean +clean: + -rm $(OBJS) diff --git a/content.d b/content.d new file mode 100644 index 0000000..aec986f --- /dev/null +++ b/content.d @@ -0,0 +1,34 @@ +This came in 2006 after attending a talk on bioinformatics. +I had the idea of making an email client that would take the +methods of bioinformatics and apply them to spam-detection. +

+

+Searches through input and outputs sequences that are repeated. +Because it's intended for text files, control characters are +ignored. +

+

+FindPatterns [filename] [-b] [-e] [-i] [-o] [-v] [-m<n>] [-l<n>] [-g<n>] [-?|h] +

+
+
filename
Attempt to read input from this file, otherwise uses stdin.
+
-b
Keep a buffer to count repeated matches (!o -> b.)
+
-e
Echo input.
+
-i
Case-insensitive (not implemented.)
+
-n
Don't display matches at the end.
+
-o
Output matches immediately as they are found.
+
-s
Silent mode - plain output with no extra characters.
+
-v
Verbose comments while outputting.
+
-g<n>
Set memory buffer granularity to the closest power of two +lower than <n> bytes (default 1024.)
+
-l<n>
Set match limit to <n> matches (default 4096; 0 -> no limit.)
+
-m<n>
Set minimum match length to <n> symbols (default 3).
+
-?|h
Display this help screen and exit.
+
+

+Adding -<s>- will turn off switch <s>. +

+

+Also included is a simple KillSpam email client that takes the patterns +generated (from FindPatterns) and eliminates all the emails that have +matching patterns. diff --git a/copying.txt b/copying.txt new file mode 100644 index 0000000..1d22c0c --- /dev/null +++ b/copying.txt @@ -0,0 +1,15 @@ +Copyright (C) 2006 Neil Edelman + +Find Patterns is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Find Patterns (see gnu.txt.) If not, see +. diff --git a/findpat.c b/findpat.c new file mode 100644 index 0000000..80af170 --- /dev/null +++ b/findpat.c @@ -0,0 +1,297 @@ +/* Copyright 2006 Neil Edelman, distributed under the terms of the + GNU General Public License, see copying.txt */ + +/* Sunday, April 2, 2006 */ + +#include +#include +#include + +/* constants */ +static const char *programme = "Find Patterns"; +static const char *year = "2006"; +static const int versionMajor = 1; +static const int versionMinor = 0; + +typedef struct TAG_match_t { + unsigned long next; + unsigned long pos; + unsigned long len; + unsigned long count; +} match_t; + +int main(int argc, char **argv); +void set_binary_switch_string(int argc, char **argv, char *flagStr, unsigned long *bvPtr); +unsigned short set_ulong_flag(int argc, char **argv, char flagChar, unsigned long *lPtr); +char *get_unswitched_arg(int argc, char **argv, unsigned int argNo); + +#define MATCHBUF 1 +#define ECHO 2 +#define NOCASE 4 +#define NOSHOWBUF 8 +#define INTERACTIVE 16 +#define SILENT 32 +#define VERBOSE 64 +#define HELP (128 | 256) + +#define DEF_MATCHLIMIT 4096 +#define DEF_MINLENGTH 3 +#define DEF_MEMGRAN 1024 + +int main(int argc, char **argv) { + match_t *matchBuf = 0; + FILE *in; + size_t matchBufSize = 0, matchBufPos = 0; + size_t bufSize, bufPos; + unsigned long hiPos, loPos, hiDat, loDat, firstMatch = ULONG_MAX, numMatches; + unsigned long flags = 0, matchLimit = DEF_MATCHLIMIT, minLength = DEF_MINLENGTH, memGran = DEF_MEMGRAN, matchMemGran; + int readValue; + unsigned short matched, worked = 1; + char *buf = 0, ch, *filename = 0; + + /* skip the first command argument */ + argv++; if(--argc < 0) { printf("Invoked with erreneous argument data (%d arguments.)\n", argc); return 3; } + /* check for the binary flags */ + set_binary_switch_string(argc, argv, "beinosvh?", &flags); + if(flags & HELP) { + /* "________________________________________________________________________________" */ + fprintf(stderr, "Version %d.%d.\n\n", versionMajor, versionMinor); + fprintf(stderr, "%s Copyright %s Neil Edelman\n", programme, year); + fprintf(stderr, "This program comes with ABSOLUTELY NO WARRANTY.\n"); + fprintf(stderr, "This is free software, and you are welcome to redistribute it\n"); + fprintf(stderr, "under certain conditions; see copying.txt.\n\n"); + printf("Searches through input and outputs sequences that are repeated. Because it's\n"); + printf("intended for text files, control characters are ignored.\n\n"); + printf("FINDPAT [filename] [-b] [-e] [-i] [-o] [-v] [-m] [-l] [-g] [-?|h]\n\n"); + printf(" filename Attempt to read input from this file, otherwise uses stdin.\n"); + printf(" -b Keep a buffer to count repeated matches (!o -> b.)\n"); + printf(" -e Echo input.\n"); + printf(" -i Case-insensitive (not implemented.)\n"); + printf(" -n Don't display matches at the end.\n"); + printf(" -o Output matches immediately as they are found.\n"); + printf(" -s Silent mode - plain output with no extra characters.\n"); + printf(" -v Verbose comments while outputting.\n"); + printf(" -g Set memory buffer granularity to the closest power of two\n"); + printf(" lower than bytes (default 1024.)\n"); + printf(" -l Set match limit to matches (default 4096; 0 -> no limit.)\n"); + printf(" -m Set minimum match length to symbols (default 3).\n"); + printf(" -?|h Display this help screen and exit.\n\n"); + printf(" Adding -- will turn off switch .\n"); + return 1; + } + if(!(flags & INTERACTIVE)) { flags |= MATCHBUF; } + if((flags & VERBOSE)) printf("Octal flags: 0%o.\n", (unsigned int)flags); + /* set memory buffer granularity (make it a power of two) and make matchMemGran memGran / sizeof(match_t) */ + set_ulong_flag(argc, argv, 'g', &memGran); + { unsigned int memPower; for(memPower = 0; memGran >>= 1; memPower++); memGran = 1 << memPower; } + if(memGran > 16) matchMemGran = memGran >> 4; else matchMemGran = 1; + if((flags & VERBOSE)) printf("Allocation granularity: %u.\n", (unsigned int)memGran); + /* set the limit on the number of matches */ + set_ulong_flag(argc, argv, 'l', &matchLimit); + if((flags & VERBOSE)) printf("Match limit: %u.\n", (unsigned int)matchLimit); + /* get the minimum match length */ + set_ulong_flag(argc, argv, 'm', &minLength); + if(!minLength) minLength = 1; + if((flags & VERBOSE)) printf("Minimum length: %u.\n", (unsigned int)minLength); + /* get the filename for input */ + if((filename = get_unswitched_arg(argc, argv, 0))) { + if(!(in = fopen(filename, "r"))) + { printf("Error opening file \"%s\" for reading.\n", filename); return 2; } + } + else if(!(in = stdin)) + { printf("Error reading from stdin.\n"); return 2; } + if((flags & VERBOSE) && filename) printf("Opened file \"%s\" at %p.\n", filename, (void *)in); + /* explain what's going on so those who unwittingly run the app won't get stuck */ + if(!(flags & SILENT)) printf("Finding repeated substrings; EOF (UNIX Ctrl-D, DOS Ctrl-Z.) to end.\n"); + /* run through the buffer, expanding and filling it with input as it goes */ + for(bufSize = 0, bufPos = 0, numMatches = 0; ; bufPos++) { + /* read in a byte */ + if((readValue = fgetc(in)) == EOF) break; + /* haha, binary mode - I don't see how this could possibly be useful */ + /* if(readValue < 0 || readValue > UCHAR_MAX) continue; */ + /* since this is text mode, ignore control characters */ + if(readValue < ' ' || readValue > '~') continue; + /* assign the inputed int to a char to save on repeated conversions */ + ch = (char)readValue; + if((flags & ECHO)) putchar(ch); + /* make sure the buffer size isn't going to exceed the limits of the data type */ + if(bufPos + 2 >= ULONG_MAX - 1) + { printf("Exceeded maximum data modulus of %u.", (unsigned int)ULONG_MAX - 1); worked = 0; break; } + /* expand the buffer if required */ + if(bufPos + 2 >= bufSize) { + bufSize = ((bufPos + 2) & ~(memGran - 1)) + memGran; + if(!(buf = (char *)realloc((void *)buf, sizeof(char) * bufSize))) + { printf("Ran out of memory allocating %u bytes.\n", (unsigned int)(sizeof(char) * bufSize)); worked = 0; break; } + else if((flags & VERBOSE)) printf("Input buffer sized to %u characters (%u bytes) at %p.\n", (unsigned int)bufSize, (unsigned int)(sizeof(char) * bufSize), buf); + } + /* write the symbol to the buffer with a null after it for string output */ + buf[bufPos] = (char)ch; + buf[bufPos + 1] = 0; + /* starting at or past half way through the buffer and work towards the + end; yes, the + 1 is right not + (bufPos & 1); think zero-offset */ + for(hiPos = (bufPos >> 1) + 1; hiPos + minLength - 1 <= bufPos; hiPos++) { + /* initial flags no match */ + matched = 0; + /* search forwards from the buffer start up to the last point where + a full match might exist with the hi sequence */ + for(loPos = 0; loPos < hiPos - (bufPos - hiPos); loPos++) { + /* search through the entire hi and lo sequences for a match */ + for(hiDat = hiPos, loDat = loPos; ; hiDat++, loDat++) { + /* break on a non-match FIXME: add a case-insensitive version */ + if(buf[hiDat] != buf[loDat]) break; + /* if at the end, the whole thing matched */ + if(hiDat >= bufPos) { matched = 1; break; } + } + /* check if the above resulted in a full match */ + if(matched) { + match_t match; + + /* check if this match is the same as the previous match */ + /* don't exceed the limit */ + if(matchLimit && numMatches++ >= matchLimit) + { printf("Match limit of %u exceeded.\n", (unsigned int)matchLimit); worked = 0; break; } + /* fill in info about the match */ + match.pos = loPos; + match.len = 1 + bufPos - hiPos; + if((flags & INTERACTIVE)) printf("<%-*.*s", (int)match.len, (int)match.len, buf + match.pos); + /* if there's a match buffer, put the match into it */ + if((flags & MATCHBUF)) { + unsigned long node, prevNode; + + /* skip past all matches up to this position */ + for(matched = 0, node = firstMatch, prevNode = ULONG_MAX; node != ULONG_MAX; prevNode = node, node = matchBuf[node].next) { + /* skip past all earlier positions */ + if(matchBuf[node].pos < match.pos) continue; + /* no match if the next position is later */ + if(matchBuf[node].pos > match.pos) break; + /* otherwise, this position must have matched */ + matched = 1; break; + } + if(matched) { + /* skip past all matches up to this length FIXME: 1 unneccessary test for if(node) and unneccesary assignment of prevNode */ + for(matched = 0; node != ULONG_MAX; prevNode = node, node = matchBuf[node].next) { + /* skip past all smaller lengths */ + if(matchBuf[node].len < match.len) continue; + /* no match if the next length is larger */ + if(matchBuf[node].len > match.len) break; + /* otherwise, this length must have matched */ + matched = 1; break; + } + } + /* if the length and position of both matches are the same, increment the existing match count */ + if(matched) matchBuf[node].count++; + /* otherwise, a new match must be created */ + else { + match_t *newMatch; + + /* make sure the buffer size isn't going to exceed the limits of the data type */ + if(matchBufPos + 1 >= ULONG_MAX - 1) /* need the -1 because ULONG_MAX is a NULL flag */ + { printf("Match buffer exceeded maximum data modulus of %u.", (unsigned int)ULONG_MAX - 1); worked = 0; break; } + /* expand the buffer if required */ + if(matchBufPos + 1 >= matchBufSize) { + matchBufSize = ((matchBufPos + 1) & ~(matchMemGran - 1)) + matchMemGran; + if(!(matchBuf = (match_t *)realloc((void *)matchBuf, sizeof(match_t) * matchBufSize))) + { printf("Ran out of memory allocating %u bytes.\n", (unsigned int)(sizeof(match_t) * matchBufSize)); worked = 0; break; } + else if((flags & VERBOSE)) printf("Match buffer sized to %u matches (%u bytes) at %p.\n", (unsigned int)matchBufSize, (unsigned int)(sizeof(match_t) * matchBufSize), (void *)matchBuf); + } + /* get the address of the new match and move ahead for next time */ + newMatch = matchBuf + matchBufPos++; + /* copy the match info into the new node */ + newMatch->pos = match.pos; newMatch->len = match.len; newMatch->count = 1; + /* insert into the linked list */ + if(prevNode != ULONG_MAX) { newMatch->next = matchBuf[prevNode].next; matchBuf[prevNode].next = matchBufPos - 1; } + else { newMatch->next = firstMatch; firstMatch = matchBufPos - 1; } + } + if((flags & INTERACTIVE)) printf("|#%u>\n", (unsigned int)matchBuf[node].count); + } + /* if there is no match buffer */ + else if((flags & INTERACTIVE)) printf(">\n"); + /* all other matches will have already been listed from + previous searches so stop looking */ + break; + } + /* make sure everything is still working */ + if(!worked) break; + } + /* make sure everything is still working */ + if(!worked) break; + } + /* make sure everything is still working */ + if(!worked) break; + } + /* if the buffer exists */ + if(matchBuf) { + unsigned long node; + + /* FIXME: filter out substrings of matches that match the same as the whole string */ + for(node = firstMatch; node != ULONG_MAX; node = matchBuf[node].next) { + } + /* print out the results */ + if(!(flags & NOSHOWBUF)) { + if((flags & SILENT)) + { for(node = firstMatch; node != ULONG_MAX; node = matchBuf[node].next) printf("%-*.*s\n", (int)matchBuf[node].len, (int)matchBuf[node].len, buf + matchBuf[node].pos); } + else + { for(node = firstMatch; node != ULONG_MAX; node = matchBuf[node].next) printf("%u*<%-*.*s>@%u\n", (unsigned int)matchBuf[node].count, (int)matchBuf[node].len, (int)matchBuf[node].len, buf + matchBuf[node].pos, (unsigned int)matchBuf[node].pos); } + } + /* free the buffer */ + if((flags & VERBOSE)) printf("Freeing match buffer at %p.\n", (void *)matchBuf); + free(matchBuf); + } + /* free the memory allocated to the buffer */ + if(buf) { if((flags & VERBOSE)) printf("Freeing input buffer at %p.\n", buf); free(buf); } + /* close the file */ + if(filename) { if((flags & VERBOSE)) printf("Closing input file at %p.\n", (void *)in); fclose(in); } + /* return the error state */ + if(worked) return 0; else return 1; +} + +void set_binary_switch_string(int argc, char **argv, char *flagStr, unsigned long *bvPtr) { + unsigned long mask; + int i; + char **arg, *str, *flagPos; + + /* go through all the argv's */ + for(i = argc, arg = argv; i > 0; i--, arg++) { + /* this argument looks like -a... or /a... */ + if((*(str = *arg) == '-' || *str == '/') && *(++str)) { + /* looks like /a -a /a+ or -a+ */ + if(!str[1] || str[1] == '+') { + for(mask = 1, flagPos = flagStr; *flagPos && mask; flagPos++, mask <<= 1) + { if(*str == *flagPos) { *bvPtr |= mask; break; } } + } + /* looks like /a- or -a- */ + else if(str[1] == '-') { + for(mask = 1, flagPos = flagStr; *flagPos && mask; flagPos++, mask <<= 1) + { if(*str == *flagPos) { *bvPtr &= ~mask; break; } } + } + } + } +} + +unsigned short set_ulong_flag(int argc, char **argv, char flagChar, unsigned long *lPtr) { + unsigned long num; + int i; + unsigned short gotNum = 0; + char **arg, *str, *end; + + /* go through all the argv's */ + for(i = argc, arg = argv; i > 0; i--, arg++) { + str = *arg; + /* looks like -fa... or /fa... */ + if((*str == '-' || *str == '/') && *(++str) == flagChar && *(++str)) + { num = strtoul(str, &end, 0); if(!*end) { *lPtr = num; gotNum = 1; } } + } + return gotNum; +} + +char *get_unswitched_arg(int argc, char **argv, unsigned int argNo) { + int i; + char **arg, *str; + + /* go through all the argv's */ + for(i = argc, arg = argv; i > 0; i--, arg++) { + /* this argument exists, doesn't look like -... or /..., and is no. argNo to be so */ + if(*(str = *arg) && *str != '/' && *str != '-' && !argNo--) return str; + } + return 0; +} diff --git a/gpl.txt b/gpl.txt new file mode 100644 index 0000000..bc08fe2 --- /dev/null +++ b/gpl.txt @@ -0,0 +1,619 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. diff --git a/killspam/death.ico b/killspam/death.ico new file mode 100644 index 0000000..0d424af Binary files /dev/null and b/killspam/death.ico differ diff --git a/killspam/deathico.h b/killspam/deathico.h new file mode 100644 index 0000000..93afd56 --- /dev/null +++ b/killspam/deathico.h @@ -0,0 +1 @@ +#define IDI_DEATH 128 diff --git a/killspam/deathico.rc b/killspam/deathico.rc new file mode 100644 index 0000000..2051e7b --- /dev/null +++ b/killspam/deathico.rc @@ -0,0 +1,3 @@ +#include "deathico.h" + +IDI_DEATH ICON "death.ico" diff --git a/killspam/killspam.c b/killspam/killspam.c new file mode 100644 index 0000000..a58de68 --- /dev/null +++ b/killspam/killspam.c @@ -0,0 +1,445 @@ +/* Copyright 2006 Neil Edelman, distributed under the terms of the + GNU General Public License, see copying.txt */ + +/* this is a LCC-WIN32 mail programme that goes with Find Patterns */ + +#include /* atoi(), getenv(), malloc(), free() */ +#include /* memcpy(), strchr() */ +#include /* printf() */ +#include /* getch() */ +#include /* I'm lazy so I'll use LCC-WIN32's regexps */ +#include "net.h" + +#define MAX_REGX_SIZE 0x4000 +#define BUF_SIZE 256 +#define LOG_FILE_NAME "email" +#define SPAM_FILE_NAME "spamregx" +#define SPAM_ENV_VAR "SPAMREGX" + +static const char *programme = "Vindicator"; +static const char *year = "2006"; +static const int versionMajor = 0; +static const int versionMinor = 1; + +typedef struct tagMailSession { + enum { + WAIT_GREET, + WAIT_USEROK, + WAIT_PASSOK, + WAIT_STAT, + CHECKA_EMAIL, + WAIT_RETROK, + PARSE_DATA, + WAIT_DELEOK, + RUN_AWAY + } state; + int mailPos; + int mailCount; + int spamFactor; + unsigned int lnSize; + char line[BUF_SIZE]; + unsigned int rpSize; + char resp[BUF_SIZE]; + FILE *log; + regexp *subjRe; + regexp *fromRe; + regexp *spamRe; +} MailSession; + +static MailSession *create_mail_session(void); +static void destroy_mail_session(MailSession *ms); +static int check_email(MailSession *ms, Net *net, char *name, unsigned int port); +static int get_response(MailSession *ms); +static int getstr(char **buf, int size, int echo); + +int main(int argc, char **argv) { + Net *net; + MailSession *ms; + int ret, port; + char *server; + + printf("%s\n\n", programme); + if(argc <= 1) { + printf("Arguments: [ []]\n\n"); + printf("Connects to (default localhost) on port (default 110)\n"); + printf("retreives emails, logging them into \"%s\" if it exists. If the\n", LOG_FILE_NAME); + printf("environment variable \"%s\" is defined or the file \"%s\"\n", SPAM_ENV_VAR, SPAM_FILE_NAME); + printf("exists, it loads the a regexp from them. Any emails matching the regexp\n"); + printf("can then be annihilated.\n\n"); + fprintf(stderr, "Version %d.%d.\n\n", versionMajor, versionMinor); + fprintf(stderr, "%s Copyright %s Neil Edelman\n", programme, year); + fprintf(stderr, "This program comes with ABSOLUTELY NO WARRANTY.\n"); + fprintf(stderr, "This is free software, and you are welcome to redistribute it\n"); + fprintf(stderr, "under certain conditions; see copying.txt.\n\n"); + return 1; + } + port = (argc >= 3) ? atoi(argv[2]) : 110; + server = (argc >= 2) ? argv[1] : "localhost"; + if(!(net = create_net(1024))) { + printf("Failed creating connection data.\n"); + return 1; + } + if(!(ms = create_mail_session())) { + printf("Failed creating e-mail session data.\n"); + destroy_net(net); + return 1; + } + ret = check_email(ms, net, server, port); + destroy_mail_session(ms); + destroy_net(net); + return ret ? 0 : 1; +} + +static MailSession *create_mail_session(void) { + MailSession *ms; + + /* create and initialize the memory space */ + if(!(ms = malloc(sizeof(MailSession)))) { + perror("Error allocating memory for e-mail data.\n"); + return 0; + } + printf("Created mail session data.\n"); + ms->state = WAIT_GREET; + ms->mailPos = 1; + ms->mailCount = 0; + /* open the log file */ + if(!(ms->log = fopen(LOG_FILE_NAME, "r"))) { + perror("Didn't find log file"); + printf("To log email, create a file nammed \"%s.\"\n", LOG_FILE_NAME); + } else { + fclose(ms->log); + if(!(ms->log = fopen(LOG_FILE_NAME, "w"))) + perror("Failed opening log file"); + else + printf("Opened log file, \"%s,\" for writing.\n", LOG_FILE_NAME); + } + if(!(ms->subjRe = regcomp("^[Ss]ubject: "))) + printf("Error compiling subject regexp.\n"); + if(!(ms->fromRe = regcomp("^[Ff]rom: "))) + printf("Error compiling sender regexp.\n"); + ms->spamRe = 0; + /* read in the spam regexp */ + { + FILE *fp = 0; + int len; + char regStr[MAX_REGX_SIZE] /* YIKES! */, *regPtr; + + if(!(regPtr = getenv(SPAM_ENV_VAR)) + && !(fp = fopen(SPAM_FILE_NAME, "r"))) { + perror("Didn't find spam regexp file"); + printf("To load a spam regexp, write it in a file nammed \"%s\" or set it\n", SPAM_FILE_NAME); + printf("as an environment variable, \"%s.\" Please enter the regexp with which to\n", SPAM_ENV_VAR); + printf("smite the vile spam:\n"); + regPtr = regStr; + if((len = getstr(®Ptr, sizeof(regStr), 1)) < 0) { + destroy_mail_session(ms); + return 0; + } + putchar('\n'); + } else if(fp) { + int ch; + + printf("Reading regexp from file \"%s.\"\n", SPAM_FILE_NAME); + len = 0; + regPtr = regStr; + for( ; ; ) { + if((ch = fgetc(fp)) == EOF) break; + if(ch && ch != '\n' && ch != '\r') { + *(regPtr++) = ch; + if(len++ >= sizeof(regStr)) { + printf("Regexp size exceeded limit of %u bytes.\n", sizeof(regStr)); + len = 0; + break; + } + } + } + fclose(fp); + *regPtr = 0; + } else { + if((len = strlen(regPtr)) >= sizeof(regStr)) { + printf("Regexp size exceeded limit of %u bytes.\n", sizeof(regStr)); + len = 0; + } else if(len >= 2 + && ((regPtr[0] == '\'' && regPtr[len - 1] == '\'') + || (regPtr[0] == '\"' && regPtr[len - 1] == '\"'))) { + memcpy(regStr, regPtr + 1, len - 2); + regStr[len - 2] = 0; + } else { + memcpy(regStr, regPtr, len); + regStr[len] = 0; + } + } + if(len > 0) { + if(!(ms->spamRe = regcomp(regStr))) + printf("Error compiling spam regexp.\n"); + else + printf("Spam regexp is %s\n", regStr); + } + } + return ms; +} + +static void destroy_mail_session(MailSession *ms) { + /* Email *eml, *nextEml; */ + + if(!ms) return; + if(ms->log) { + printf("Closing log file.\n"); + fclose(ms->log); ms->log = 0; + } + if(ms->subjRe) { free(ms->subjRe); ms->subjRe = 0; } + if(ms->fromRe) { free(ms->fromRe); ms->fromRe = 0; } + if(ms->spamRe) { free(ms->spamRe); ms->spamRe = 0; } + /* for(eml = ms->emailList; eml; eml = nextEml) { + nextEml = eml->next; + free(eml); + } */ + free(ms); +} + +static int check_email(MailSession *ms, Net *net, char *name, unsigned int port) { + unsigned int lnRem, lnEol, svRem, svEol; + char *lnPtr, *svNl, *svPtr; + + /* connect to the given host */ + if(!set_net_host(net, name, port) + || !connect_net(net)) return 0; + /* loop reading lines from the server */ + lnPtr = ms->line; + lnRem = sizeof(ms->line) - 1; + svPtr = 0; + svRem = 0; + for( ; ; ) { + /* if the server string is empty, check for more */ + if(!svRem && !read_net_string(net, &svPtr, &svRem, 100000)) return 0; + /* how many chars until the end of the line */ + svEol = (svNl = strchr(svPtr, '\n')) ? svNl - svPtr + 1 : svRem; + lnEol = (svEol > lnRem) ? lnRem : svEol; + /* copy over a whole line from the server string into the local one */ + memcpy(lnPtr, svPtr, lnEol); + lnPtr[lnEol] = 0; + /*printf("svPtr = %s; svRem = %u; lnRem = %u; svEol = %u; lnEol = %u; lnPtr = %s\n", svPtr, svRem, lnRem, svEol, lnEol, lnPtr);*/ + /* shift all positions by this amount */ + svPtr += lnEol; + svRem -= lnEol; + svEol -= lnEol; + lnPtr += lnEol; + lnRem -= lnEol; + /* check if there was a new line or the local buffer is full */ + if(svNl || !lnRem) { + ms->lnSize = sizeof(ms->line) - lnRem - 1; + ms->rpSize = sizeof(ms->resp); + if(!get_response(ms)) break; + if(ms->rpSize && !send_net_string(net, ms->resp, ms->rpSize)) return 0; + lnPtr = ms->line; + lnRem = sizeof(ms->line) - 1; + } + } + return 1; +} + + +static int get_response(MailSession *ms) { + char *rpPtr; + + if(ms->state != PARSE_DATA) { + printf("Mr. Mail Deamon: %s", ms->line); + if(!ms->lnSize || ms->line[ms->lnSize - 1] != '\n') putchar('\n'); + } + switch(ms->state) { + case WAIT_GREET: + if(*ms->line != '+') { + printf("If this is a POP3 mail server, it seems to be having an identity crisis.\n"); + return 0; + } + printf("The server is glad to see you. It wants your user name: "); + rpPtr = ms->resp + sprintf(ms->resp, "user "); + if(getstr(&rpPtr, ms->rpSize + ms->resp - rpPtr, 1) < 0) return 0; + printf("\n"); rpPtr += sprintf(rpPtr, "\n"); + ms->rpSize = rpPtr - ms->resp; + printf("%s: %s", programme, ms->resp); + ms->state = WAIT_USEROK; + return 1; + case WAIT_USEROK: + if(*ms->line == '-') break; + if(*ms->line != '+') return 1; + printf("That user name seems acceptable. Now for a password: "); + rpPtr = ms->resp + sprintf(ms->resp, "pass "); + if(getstr(&rpPtr, ms->rpSize + ms->resp - rpPtr, 0) < 0) return 0; + printf("\n"); rpPtr += sprintf(rpPtr, "\n"); + ms->rpSize = rpPtr - ms->resp; + printf("%s: pass \n", programme/*, ms->resp*/); + ms->state = WAIT_PASSOK; + return 1; + case WAIT_PASSOK: + if(*ms->line == '-') break; + if(*ms->line != '+') return 1; + printf("The server has accepted you! It thinks you are Special.\n"); + sprintf(ms->resp, "stat\n"); ms->rpSize = 5; + printf("%s: %s", programme, ms->resp); + ms->state = WAIT_STAT; + return 1; + case WAIT_STAT: + if(*ms->line == '-') break; + if(*ms->line != '+') return 1; + { + char *numStart; + + rpPtr = ms->line; + while(*rpPtr && *rpPtr != ' ') rpPtr++; + while(*rpPtr == ' ') rpPtr++; + if(!*(numStart = rpPtr)) { printf("That doesn't make any sense!\n"); break; } + while(*rpPtr && *rpPtr != ' ') rpPtr++; + *rpPtr = 0; + ms->mailCount = atoi(numStart); + } + if(ms->mailPos <= ms->mailCount) { + rpPtr = ms->resp + sprintf(ms->resp, "retr %u\n", ms->mailPos); + ms->rpSize = rpPtr - ms->resp; + ms->state = WAIT_RETROK; + } else { + sprintf(ms->resp, "quit\n"); ms->rpSize = 5; + ms->state = RUN_AWAY; + } + printf("%s: %s", programme, ms->resp); + return 1; + case WAIT_RETROK: + if(*ms->line == '-') break; + if(*ms->line != '+') return 1; + *ms->resp = 0; + ms->rpSize = 0; + /* keep it simple . . . (who wants confirmation later anyway?) + { + Email *eml; + + if(!(eml = malloc(sizeof(Email)))) + perror("Failed allocating memory to store e-mail info."); + else { + eml->next = ms->emailList; + eml->subject[0] = 0; + eml->sender[0] = 0; + eml->number = ms->mailPos; + eml->spamFactor = 0; + ms->emailList = eml; + } + } */ + ms->spamFactor = 0; + /* printf("[ Scouring message %u for unholy spam . . . ]\n", ms->mailPos); */ + ms->state = PARSE_DATA; + return 1; + case PARSE_DATA: + if(ms->line[0] == '.' && (ms->line[1] == '\r' || ms->line[1] == '\n')) { + if(ms->log) fprintf(ms->log, ".\n[ End of Message %u. ]\n", ms->mailPos); + if(ms->spamFactor) { + rpPtr = ms->resp + sprintf(ms->resp, "dele %u\n", ms->mailPos); + ms->rpSize = rpPtr - ms->resp; + ms->state = WAIT_DELEOK; + } else if(++ms->mailPos <= ms->mailCount) { + rpPtr = ms->resp + sprintf(ms->resp, "retr %u\n", ms->mailPos); + ms->rpSize = rpPtr - ms->resp; + ms->state = WAIT_RETROK; + } else { + sprintf(ms->resp, "quit\n"); ms->rpSize = 5; + ms->state = RUN_AWAY; + } + printf("%s: %s", programme, ms->resp); + } else { + if(ms->log) fprintf(ms->log, "%s", ms->line); + /* if(ms->emailList + && ms->emailList->number == ms->mailPos) { + if(!ms->emailList->subject[0] + && ms->subjRe && regexec(ms->subjRe, ms->line)) { + strncpy(ms->emailList->subject, ms->line, sizeof(ms->emailList->subject)); + printf("%s", ms->line); + } else if(!ms->emailList->sender[0] + && ms->fromRe && regexec(ms->fromRe, ms->line)) { + strncpy(ms->emailList->subject, ms->line, sizeof(ms->emailList->subject)); + printf("%s", ms->line); + } + if(!ms->emailList->spamFactor + && ms->spamRe && regexec(ms->spamRe, ms->line)) { + ms->emailList->spamFactor++; + sm->spamFactor++; + printf("EVIL SPAM!->%s", ms->line); + } + } */ + if(ms->subjRe && regexec(ms->subjRe, ms->line)) + printf("%s", ms->line); + else if(ms->fromRe && regexec(ms->fromRe, ms->line)) + printf("%s", ms->line); + if(ms->spamRe && regexec(ms->spamRe, ms->line)) { + ms->spamFactor++; + printf("---EVIL SPAM!--->%s", ms->line); + } + *ms->resp = 0; + ms->rpSize = 0; + } + return 1; + case WAIT_DELEOK: + if(*ms->line == '-') break; + if(*ms->line != '+') return 1; + if(++ms->mailPos <= ms->mailCount) { + rpPtr = ms->resp + sprintf(ms->resp, "retr %u\n", ms->mailPos); + ms->rpSize = rpPtr - ms->resp; + ms->state = WAIT_RETROK; + } else { + sprintf(ms->resp, "quit\n"); ms->rpSize = 5; + ms->state = RUN_AWAY; + } + printf("%s: %s", programme, ms->resp); + return 1; + case RUN_AWAY: + if(*ms->line != '+') printf("The server is very confused. "); + printf("Bye server.\n"); + return 0; + } + printf("The server is confused!\n"); + sprintf(ms->resp, "quit\n"); ms->rpSize = 5; + ms->state = RUN_AWAY; + return 1; +} + +static int getstr(char **buf, int size, int echo) { + int ch; + int pos = 0; + char *str = *buf; + + if(size < 1) return -1; + *str = 0; + for( ; ; ) { + ch = getch(); + if(ch <= 0) return -1; + if((ch & 0x60) == 0x00) { + switch((ch & 0x1F) | 0x40) { + case '@': /* ^@ - null */ + case 'C': /* ^C - end of text */ + case 'D': /* ^D - end of transmission */ + case '[': /* ^[ - escape */ + *buf = str; + return -1; + case 'U': + if(echo) { while(pos--) printf("\b \b"); } + pos = 0; *(str = *buf) = 0; + break; + case 'H': + if(pos) { + pos--; *(--str) = 0; + if(echo) printf("\b \b"); + } + break; + case 'J': /* ^J - LF (NL) new line */ + case 'M': /* ^M - CR return */ + *buf = str; + return pos; + } + } else if(ch == 0x7F) { + } else { + if(pos + 1 < size) { + if(echo) putchar(ch); + str[0] = ch; + str[1] = 0; + if(pos + 2 < size) { pos++; str++; } + } + } + } +} diff --git a/killspam/makefile b/killspam/makefile new file mode 100644 index 0000000..1fb2a1c --- /dev/null +++ b/killspam/makefile @@ -0,0 +1,38 @@ +out := killspam.exe +obj := net.o killspam.o +res := deathico.res +lib := ws2_32.lib regexp.lib + +# cc = gcc -c -fasm -pedantic -Wall -O3\ +# -fomit-frame-pointer -ffast-math -funroll-loops\ +# $*.c -o $*.o +# ld = gcc -s -lws2_32 -O3 -o $(out) $(obj) $(res) $(lib) +# rc = windres $*.rc -o $*.res + +cc = lcc -A -ansic -e8 -O -Fo$*.o $*.c +ld = lcclnk -s -o $(out) $(obj) $(res) $(lib) +rc = lrc -Fo$*.res $*.rc + +default : $(out) + +.PHONY : clean +clean : + $(foreach file, $(obj) $(out), -$(RM) $(file)$(br)) + +$(out) : $(obj) $(res) + $(ld) + +net.o : net.c + $(cc) + +killspam.o : killspam.c + $(cc) + +deathico.res : deathico.rc + $(rc) + +%.o : %.c + $(cc) + +%.res : %.rc + $(rc) diff --git a/killspam/net.c b/killspam/net.c new file mode 100644 index 0000000..0a351d5 --- /dev/null +++ b/killspam/net.c @@ -0,0 +1,227 @@ +/* Copyright 2006 Neil Edelman, distributed under the terms of the + GNU General Public License, see copying.txt */ + +/* this is a socket programme for Windows that goes with the KillSpam */ + +#include +#include /* malloc(), free() */ +#include /* printf(), fprintf(), perror() */ +#include /* strncmp() */ +#include "net.h" + +#define WSA_VERSION 2 + +struct tagNet { + enum { + WSA_DLL = 1, + FOUND_HOST = 2, + CONNECTED = 4 + } flags; + int sd; + unsigned long nlAddr; + unsigned short nsPort; + unsigned int bufSize; + char *buffer; +}; + +Net *create_net(unsigned int bufSize) { + WSADATA wsaData; + Net *net; + int ret; + + /* require a buffer with at least a single character in it plus a null */ + if(bufSize < 2) return 0; + /* allocate and initialize the new structure */ + if(!(net = malloc(sizeof(Net) + bufSize))) return 0; + net->flags = 0; + net->sd = INVALID_SOCKET; + net->nlAddr = INADDR_NONE; + net->bufSize = bufSize; + net->buffer = (char *)(net + 1); + /* load Winsock 2.0 DLL */ + if((ret = WSAStartup(WSA_VERSION, &wsaData)) != 0) { + printf("Error initializing windows socket library (%d.)\n", ret); + destroy_net(net); + return 0; + } else { + printf("Windows socket library loaded.\n"); + net->flags |= WSA_DLL; + } + return net; +} + +void destroy_net(Net *net) { + if(!net) return; + if(net->flags & CONNECTED) disconnect_net(net); + if(net->flags & WSA_DLL) { + printf("Releasing Windows sockets library.\n"); + /* unload Winsock DLL */ + if(WSACleanup() != 0) { + printf("Failed shutting down socket library (%d.)\n", WSAGetLastError()); + } + } + free(net); +} + +int set_net_host(Net *net, char *hostName, unsigned short port) { + unsigned long nlAddr; + + if(!net || !net->flags & WSA_DLL || !hostName) return 0; + /* first check if this is a valid dotted IP address */ + if((nlAddr = inet_addr(hostName)) == INADDR_NONE) { + /* make sure it wasn't specifically INADDR_NONE */ + if(strncmp(hostName, "255.255.255.255", 15) != 0) { + struct hostent *ent; + + printf("Looking up %s.\n", hostName); + /* lastly try to look up the host address with DNS */ + if(!(ent = gethostbyname(hostName))) { + /* herror("Looking up host address"); <- not in WSA */ + printf("Couldn't find address %s (%d.)\n", hostName, WSAGetLastError()); + } else { + struct in_addr inadr; + + nlAddr = inadr.s_addr = *((unsigned long *)ent->h_addr_list[0]); + printf("Found I.P. %s.\n", inet_ntoa(inadr)); + } + } + } + if(nlAddr == INADDR_NONE) return 0; + net->nlAddr = nlAddr; + /* set the port using network byte order */ + net->nsPort = htons(port); + net->flags |= FOUND_HOST; + return 1; +} + +int connect_net(Net *net) { + struct sockaddr_in inaddr; + + if(!net + || !(net->flags & FOUND_HOST) + || (net->flags & CONNECTED)) return 0; + /* create the socket */ + if((net->sd = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) { + perror("Unable to create socket"); + /* printf("Unable to create socket (%u.)\n", WSAGetLastError()); */ + return 0; + } else { + printf("Opened socket.\n"); + } + /* connect to the host */ + memset(&inaddr, 0, sizeof(inaddr)); + inaddr.sin_family = AF_INET; + inaddr.sin_port = net->nsPort; + inaddr.sin_addr.s_addr = net->nlAddr; + if(connect(net->sd, (const struct sockaddr *)&inaddr, sizeof(inaddr)) != 0) { + perror("Failed connecting to host"); + /* printf("Failed connecting to host (%u.)\n", WSAGetLastError()); */ + disconnect_net(net); + return 0; + } + else { + net->flags |= CONNECTED; + printf("Connected on port %u.\n", ntohs(inaddr.sin_port)); + } + return 1; +} + +void disconnect_net(Net *net) { + if(!net) return; + if((net->flags & CONNECTED)) { + int len; + + printf("Disconnecting.\n"); + /* initiate sending shutdown */ + if(shutdown(net->sd, SD_SEND) != 0) { + perror("Error shutting down outgoing connection"); + /* printf("Error shutting down outgoing connection (%u.)\n", WSAGetLastError()); */ + } + /* loop on recv until all data is received */ + while((len = recv(net->sd, net->buffer, net->bufSize, 0))) { + if(len == SOCKET_ERROR) { + perror("Error receiving left-over data"); + /* printf("Error receiving left-over data (%u.)\n", WSAGetLastError()); */ + break; + } + } + /* shut down reading on this connection */ + if(shutdown(net->sd, SD_RECEIVE) != 0) { + perror("Error shutting down incoming connection"); + /*printf("Error shutting down incoming connection (%u.)\n", WSAGetLastError()); */ + } + net->flags &= ~CONNECTED; + } + if(net->sd != INVALID_SOCKET) { + printf("Closing socket.\n"); + /* release the socket descriptor */ + if(closesocket(net->sd) != 0) { + perror("Unable to close socket"); + /* printf("Unable to close socket (%u.)\n", WSAGetLastError()); */ + } else { + net->sd = INVALID_SOCKET; + } + } +} + +int send_net_string(Net *net, char *str, int len) { + int ret; + + if(!net || !(net->flags & CONNECTED)) return 0; + for( ; ; ) { + if((ret = send(net->sd, str, len, 0)) == SOCKET_ERROR) { + perror("Error sending data"); + /* printf("Error sending data (%u.)\n", WSAGetLastError()); */ + return 0; + } + str += ret; + if((len -= ret) <= 0) break; + } + return 1; +} + +int read_net_string(Net *net, char **bufAddr, unsigned int *lenAddr, long usec) { + struct timeval tv; + struct fd_set readfds; + int len; + + if(!net + || !(net->flags & CONNECTED) + || usec < 0) return 0; + tv.tv_sec = 0; + tv.tv_usec = usec; + /* initialize to an empty buffer */ + *net->buffer = 0; + *bufAddr = net->buffer; + *lenAddr = 0; + /* since this is Windows, it doesn't quite work like it should */ + /* fdStdin = fileno(stdin); */ + FD_ZERO(&readfds); + /* FD_SET(fdStdin, &readfds); */ + FD_SET(net->sd, &readfds); + /* fdMax = (net->sd > fdStdin) ? net->sd : fdStdin; */ + if((len = select(net->sd, &readfds, 0, 0, &tv)) == SOCKET_ERROR) { + perror("Error checking input status"); + return 0; + } + /* ioctlsocket(net->sd, FIONREAD, &amount); + if(!amount) return 0; */ + if(FD_ISSET(net->sd, &readfds)) { + if((len = recv(net->sd, net->buffer, net->bufSize - 1, 0)) == SOCKET_ERROR) { + perror("Error receiving data"); + /* printf("Error receiving data (%u.)\n", WSAGetLastError()); */ + return 0; + } else if(!len) { + printf("Connection terminated by host.\n"); + /* successfully reading nothing indicates disconnection */ + net->flags &= ~CONNECTED; + return 0; + } + /* this will never happen */ + if(len > net->bufSize - 1) len = net->bufSize - 1; + /* terminate the buffer with a null character */ + net->buffer[len] = 0; + *lenAddr = len; + } + return 1; +} diff --git a/killspam/net.h b/killspam/net.h new file mode 100644 index 0000000..5f8a23b --- /dev/null +++ b/killspam/net.h @@ -0,0 +1,9 @@ +typedef struct tagNet Net; + +Net *create_net(unsigned int bufSize); +void destroy_net(Net *net); +int set_net_host(Net *net, char *hostName, unsigned short port); +int connect_net(Net *net); +void disconnect_net(Net *net); +int send_net_string(Net *net, char *str, int len); +int read_net_string(Net *net, char **bufAddr, unsigned int *lenAddr, long usec); diff --git a/killspam/spamregx b/killspam/spamregx new file mode 100644 index 0000000..dc117c0 --- /dev/null +++ b/killspam/spamregx @@ -0,0 +1,21 @@ +^(To|C[Cc]): .*@(hotmail|yahoo).com| +^Subject: .*([Mm]ortgage|[Dd]ebt|[Mm]oney|[Aa]pprov(ed|al)|[Ee]-?[Bb]ay| +[Bb]ulk e-?mail|EMAIL|[Vv][Ii1][Aa][Gg][Rr][Aa]| Meds| | +[Pp]rescr[1i]pt[1i]on|[0-9]+% [Oo]ff|!$)| +^X-Mailer: The Bat! .* Business| +Received: from [a-z\.]*\.(jp|ro|net|ru|ar|uk) \(unknown| +.+.+.+|] [-l] [-g] [-?|h] + + filename Attempt to read input from this file, otherwise uses stdin. + -b Keep a buffer to count repeated matches (!o -> b.) + -e Echo input. + -i Case-insensitive (not implemented.) + -n Don't display matches at the end. + -o Output matches immediately as they are found. + -s Silent mode - plain output with no extra characters. + -v Verbose comments while outputting. + -g Set memory buffer granularity to the closest power of two + lower than bytes (default 1024.) + -l Set match limit to matches (default 4096; 0 -> no limit.) + -m Set minimum match length to symbols (default 3). + -?|h Display this help screen and exit. + + Adding -- will turn off switch . diff --git a/spamregx b/spamregx new file mode 100644 index 0000000..dc117c0 --- /dev/null +++ b/spamregx @@ -0,0 +1,21 @@ +^(To|C[Cc]): .*@(hotmail|yahoo).com| +^Subject: .*([Mm]ortgage|[Dd]ebt|[Mm]oney|[Aa]pprov(ed|al)|[Ee]-?[Bb]ay| +[Bb]ulk e-?mail|EMAIL|[Vv][Ii1][Aa][Gg][Rr][Aa]| Meds| | +[Pp]rescr[1i]pt[1i]on|[0-9]+% [Oo]ff|!$)| +^X-Mailer: The Bat! .* Business| +Received: from [a-z\.]*\.(jp|ro|net|ru|ar|uk) \(unknown| +.+.+.+|