import pyzor 0.4.0

Pyzor is a collaborative, networked system to detect and block spam
using identifying digests of messages.

Pyzor initially started out to be merely a Python implementation of
Razor, but due to the protocol and the fact that Razor's server is not
Free Software, I decided to Pyzor should be based upon a new, lighter,
more language- independent protocol.

requested by and ok dlg@
This commit is contained in:
jasper 2008-09-02 12:33:17 +00:00
parent 589d72e28f
commit 129eecc4f6
7 changed files with 548 additions and 0 deletions

28
mail/pyzor/Makefile Normal file
View File

@ -0,0 +1,28 @@
# $OpenBSD: Makefile,v 1.1.1.1 2008/09/02 12:33:17 jasper Exp $
COMMENT= collaborative, networked system to detect and block spam
DISTNAME= pyzor-0.4.0
CATEGORIES= mail net
HOMEPAGE= http://pyzor.sourceforge.net/
# GPLv2
PERMIT_PACKAGE_CDROM= Yes
PERMIT_PACKAGE_FTP= Yes
PERMIT_DISTFILES_CDROM= Yes
PERMIT_DISTFILES_FTP= Yes
MASTER_SITES= ${MASTER_SITE_SOURCEFORGE:=pyzor/}
EXTRACT_SUFX= .tar.bz2
MODULES= lang/python
NO_REGRESS= Yes
SUBST_VARS= MODPY_BIN
pre-configure:
${SUBST_CMD} ${WRKSRC}/scripts/pyzor ${WRKSRC}/scripts/pyzord
.include <bsd.port.mk>

5
mail/pyzor/distinfo Normal file
View File

@ -0,0 +1,5 @@
MD5 (pyzor-0.4.0.tar.bz2) = 7nr+TMnUGbzl8pJQoBxDdA==
RMD160 (pyzor-0.4.0.tar.bz2) = 7cA8CEaVEohQ94jFdVS5Z9vO/Y4=
SHA1 (pyzor-0.4.0.tar.bz2) = BXfSkSQKemZJLCtsu/Sf1e2vAv0=
SHA256 (pyzor-0.4.0.tar.bz2) = J7V/SF7pSpizb12Fz38qYb3/NfxABZRv5TZ87QNb1DA=
SIZE (pyzor-0.4.0.tar.bz2) = 41764

View File

@ -0,0 +1,477 @@
$OpenBSD: patch-lib_pyzor_client_py,v 1.1.1.1 2008/09/02 12:33:17 jasper Exp $
- Allow pyzor to scan mailboxes. From Debian.
- Handle unknown encodings. Even binary parts are handled now.
Treat empty type as 'text'. From FreeBSD.
--- lib/pyzor/client.py.orig Tue Sep 2 12:48:16 2008
+++ lib/pyzor/client.py Tue Sep 2 12:52:33 2008
@@ -8,6 +8,7 @@ import cStringIO
import getopt
import tempfile
import mimetools
+import multifile
import sha
import pyzor
@@ -58,11 +59,6 @@ class Client(object):
self.send(msg, address)
return self.read_response(msg.get_thread())
- def shutdown(self, address):
- msg = ShutdownRequest()
- self.send(msg, address)
- return self.read_response(msg.get_thread())
-
def build_socket(self):
self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
@@ -132,39 +128,50 @@ class ExecCall(object):
def run(self):
debug = 0
- (options, args) = getopt.getopt(sys.argv[1:], 'dh:', ['homedir='])
- if len(args) < 1:
- self.usage()
-
specified_homedir = None
+ options = None
+ log = None
+
+ try:
+ (options, args) = getopt.getopt(sys.argv[1:], 'd', ['homedir=', 'log'])
+ except getopt.GetoptError:
+ self.usage()
+ if len(args) < 1:
+ self.usage()
+
for (o, v) in options:
if o == '-d':
debug = 1
- elif o == '-h':
- self.usage()
elif o == '--homedir':
specified_homedir = v
+ elif o == '--log':
+ log = 1
self.output = Output(debug=debug)
-
homedir = pyzor.get_homedir(specified_homedir)
-
+
+ if log:
+ sys.stderr = open(homedir + "/pyzor.log", 'a')
+ sys.stderr.write("\npyzor[" + repr (os.getpid()) + "]:\n")
+
config = pyzor.Config(homedir)
config.add_section('client')
- defaults = {'ServersFile': 'servers',
+ defaults = {'ServersFile': 'servers',
'DiscoverServersURL': ServerList.inform_url,
- 'AccountsFile' : 'accounts',
+ 'AccountsFile': 'accounts',
+ 'Timeout': str(Client.timeout),
}
for k, v in defaults.items():
config.set('client', k, v)
-
+
config.read(os.path.join(homedir, 'config'))
servers_fn = config.get_filename('client', 'ServersFile')
-
+ Client.timeout = config.getint('client', 'Timeout')
+
if not os.path.exists(homedir):
os.mkdir(homedir)
@@ -197,10 +204,13 @@ class ExecCall(object):
def usage(self, s=None):
if s is not None:
sys.stderr.write("%s\n" % s)
- sys.stderr.write("""usage: %s [-d] [--homedir dir] command [cmd_opts]
+ sys.stderr.write("""
+usage: %s [-d] [--homedir dir] command [cmd_opts]
command is one of: check, report, discover, ping, digest, predigest,
- genkey, shutdown
+ genkey
+
Data is read on standard input (stdin).
+
"""
% sys.argv[0])
sys.exit(2)
@@ -208,9 +218,9 @@ Data is read on standard input (stdin).
def ping(self, args):
- getopt.getopt(args[1:], '')
-
- if len(args) > 1:
+ try:
+ getopt.getopt(args[1:], '')
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
runner = ClientRunner(self.client.ping)
@@ -221,30 +231,23 @@ Data is read on standard input (stdin).
return runner.all_ok
- def shutdown(self, args):
- (opts, args2) = getopt.getopt(args[1:], '')
-
- if len(args2) > 1:
+ def info(self, args):
+ try:
+ (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
- runner = ClientRunner(self.client.shutdown)
+ do_mbox = 'msg'
- for arg in args2:
- server = Address.from_str(arg)
- runner.run(server, (server,))
-
- return runner.all_ok
+ for (o, v) in options:
+ if o == '--mbox':
+ do_mbox = 'mbox'
-
- def info(self, args):
- getopt.getopt(args[1:], '')
-
- if len(args) > 1:
- self.usage("%s does not take any non-option arguments" % args[0])
-
runner = InfoClientRunner(self.client.info)
- for digest in FileDigester(sys.stdin, self.digest_spec):
+ for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+ if digest is None:
+ continue
for server in self.servers:
response = runner.run(server, (digest, server))
@@ -252,34 +255,45 @@ Data is read on standard input (stdin).
def check(self, args):
- getopt.getopt(args[1:], '')
-
- if len(args) > 1:
+ try:
+ (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
+ do_mbox = 'msg'
+
+ for (o, v) in options:
+ if o == '--mbox':
+ do_mbox = 'mbox'
+
runner = CheckClientRunner(self.client.check)
- for digest in FileDigester(sys.stdin, self.digest_spec):
+ for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+ if digest is None:
+ continue
for server in self.servers:
- response = runner.run(server, (digest, server))
+ runner.run(server, (digest, server))
return (runner.found_hit and not runner.whitelisted)
def report(self, args):
- (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
- do_mbox = False
-
- if len(args2) > 1:
+ try:
+ (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
+ do_mbox = 'msg'
+
for (o, v) in options:
if o == '--mbox':
- do_mbox = True
+ do_mbox = 'mbox'
all_ok = True
- for digest in FileDigester(sys.stdin, self.digest_spec, do_mbox):
+ for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+ if digest is None:
+ continue
if not self.send_digest(digest, self.digest_spec,
self.client.report):
all_ok = False
@@ -302,20 +316,22 @@ Data is read on standard input (stdin).
def whitelist(self, args):
- (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
-
- if len(args2) > 1:
+ try:
+ (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
- do_mbox = False
+ do_mbox = 'msg'
for (o, v) in options:
if o == '--mbox':
- do_mbox = True
+ do_mbox = 'mbox'
all_ok = True
- for digest in FileDigester(sys.stdin, self.digest_spec, do_mbox):
+ for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+ if digest is None:
+ continue
if not self.send_digest(digest, self.digest_spec,
self.client.whitelist):
all_ok = False
@@ -324,28 +340,29 @@ Data is read on standard input (stdin).
def digest(self, args):
- (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
-
- if len(args2) > 1:
+ try:
+ (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
+ do_mbox = 'msg'
- do_mbox = False
-
for (o, v) in options:
if o == '--mbox':
- do_mbox = True
+ do_mbox = 'mbox'
- for digest in FileDigester(sys.stdin, self.digest_spec, do_mbox):
+ for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+ if digest is None:
+ continue
sys.stdout.write("%s\n" % digest)
return True
def print_digested(self, args):
- getopt.getopt(args[1:], '')
-
- if len(args) > 1:
+ try:
+ getopt.getopt(args[1:], '')
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
def loop():
@@ -358,9 +375,9 @@ Data is read on standard input (stdin).
return True
def genkey(self, args):
- getopt.getopt(args[1:], '')
-
- if len(args) > 1:
+ try:
+ getopt.getopt(args[1:], '')
+ except getopt.GetoptError:
self.usage("%s does not take any non-option arguments" % args[0])
import getpass
@@ -414,7 +431,6 @@ Data is read on standard input (stdin).
'report': report,
'ping' : ping,
'genkey': genkey,
- 'shutdown': shutdown,
'info': info,
'whitelist': whitelist,
'digest': digest,
@@ -466,7 +482,7 @@ class DataDigester(object):
(fp, offsets) = self.get_line_offsets(fp)
- # did we get an empty file?
+ # did we get an empty (parsed output) file?
if len(offsets) == 0:
return
@@ -608,32 +624,38 @@ class PrintingDataDigester(DataDigester):
-class FileDigester(BasicIterator):
- __slots__ = ['digester']
-
- def __init__(self, fp, spec, mbox=False):
- self.digester = iter(get_file_digester(fp, spec, mbox))
- self.output = pyzor.Output()
-
- def next(self):
- digest = self.digester.next()
- self.output.debug("calculated digest: %s" % digest)
- return digest
-
-
-
-def get_file_digester(fp, spec, mbox, seekable=False):
+def get_input_handler(fp, spec, style='msg', seekable=False):
"""Return an object that can be iterated over
to get all the digests from fp according to spec.
mbox is a boolean"""
- if mbox:
+ if style == 'msg':
+ return filter(lambda x: x is not None,
+ (DataDigester(rfc822BodyCleaner(fp),
+ spec, seekable).get_digest(),)
+ )
+
+ elif style =='mbox':
return MailboxDigester(fp, spec)
- return (DataDigester(rfc822BodyCleaner(fp),
- spec, seekable).get_digest(),)
+ elif style == 'digests':
+ return JustDigestsIterator(fp)
+ raise ValueError, "unknown input style"
+class JustDigestsIterator(BasicIterator):
+ __slots__ = ['fp']
+
+ def __init__(self, fp):
+ self.fp = fp
+
+ def next(self):
+ l = fp.readline()
+ if not l:
+ raise StopIteration
+ return l.rstrip()
+
+
class MailboxDigester(BasicIterator):
__slots__ = ['mbox', 'digest_spec', 'seekable']
@@ -645,7 +667,12 @@ class MailboxDigester(BasicIterator):
self.seekable = seekable
def next(self):
- next_msg = self.mbox.next()
+ try:
+ next_msg = self.mbox.next()
+ except IOError:
+ print "Error: Please feed mailbox files in on stdin, i.e."
+ print " pyzor digest --mbox < my_mbox_file"
+ next_msg = None
if next_msg is None:
raise StopIteration
return DataDigester(next_msg, self.digest_spec,
@@ -662,39 +689,70 @@ class rfc822BodyCleaner(BasicIterator):
self.multifile = None
self.curfile = None
+ # Check if we got a mail or not. Set type to binary if there is no 'From:' header and
+ # type text/plain with encoding 7bit. 7bit is passed trough anyway so nobody cares.
+ if (not msg.has_key("From") and self.type == 'text' and msg.subtype == 'plain' and msg.getencoding() == '7bit'):
+ self.type = 'binary';
+
+ if self.type is '':
+ self.type = 'text';
+
if self.type == 'text':
encoding = msg.getencoding()
- if encoding == '7bit':
- self.curfile = msg.fp
- else:
- self.curfile = tempfile.TemporaryFile()
- mimetools.decode(msg.fp, self.curfile, encoding)
- self.curfile.seek(0)
-
+ self.curfile = msg.fp
+ if encoding != '7bit':
+ # fix bad encoding name
+ if encoding == '8bits':
+ encoding = '8bit'
+ try:
+ newcurfile = tempfile.TemporaryFile()
+ mimetools.decode(msg.fp, newcurfile, encoding)
+ newcurfile.seek(0)
+ self.curfile = newcurfile
+ except:
+ # ignore encoding on errors, pass msg as is
+ pass
+
elif self.type == 'multipart':
import multifile
self.multifile = multifile.MultiFile(msg.fp, seekable=False)
self.multifile.push(msg.getparam('boundary'))
- self.multifile.next()
- self.curfile = self.__class__(self.multifile)
+ try:
+ self.multifile.next()
+ self.curfile = self.__class__(self.multifile)
+ except:
+ #
+ # Catch multipart decoding errors
+ #
+ fp.seek(0)
+ self.curfile = fp
+ self.type = 'binary'
if self.type == 'text' or self.type == 'multipart':
assert self.curfile is not None
+ elif self.type == 'binary':
+ try:
+ fp.seek(0)
+ except:
+ pass
+ self.curfile = fp
else:
assert self.curfile is None
def readline(self):
l = ''
- if self.type in ('text', 'multipart'):
- l = self.curfile.readline()
-
- if self.type == 'multipart' and not l and self.multifile.next():
- self.curfile = self.__class__(self.multifile)
- # recursion. Could get messy if
- # we get a bunch of empty multifile parts
- l = self.readline()
+ try:
+ if self.type in ('text', 'multipart', 'binary'):
+ l = self.curfile.readline()
+ if self.type == 'multipart' and not l and self.multifile.next():
+ self.curfile = self.__class__(self.multifile)
+ # recursion. Could get messy if
+ # we get a bunch of empty multifile parts
+ l = self.readline()
+ except (TypeError, multifile.Error):
+ pass
return l

View File

@ -0,0 +1,9 @@
$OpenBSD: patch-scripts_pyzor,v 1.1.1.1 2008/09/02 12:33:17 jasper Exp $
--- scripts/pyzor.orig Tue Sep 2 12:55:20 2008
+++ scripts/pyzor Tue Sep 2 12:55:37 2008
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!${MODPY_BIN}
import pyzor.client
pyzor.client.run()

View File

@ -0,0 +1,9 @@
$OpenBSD: patch-scripts_pyzord,v 1.1.1.1 2008/09/02 12:33:17 jasper Exp $
--- scripts/pyzord.orig Tue Sep 2 12:55:24 2008
+++ scripts/pyzord Tue Sep 2 12:55:38 2008
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!${MODPY_BIN}
import os
import os.path

7
mail/pyzor/pkg/DESCR Normal file
View File

@ -0,0 +1,7 @@
Pyzor is a collaborative, networked system to detect and block spam
using identifying digests of messages.
Pyzor initially started out to be merely a Python implementation of
Razor, but due to the protocol and the fact that Razor's server is not
Free Software, I decided to Pyzor should be based upon a new, lighter,
more language- independent protocol.

13
mail/pyzor/pkg/PLIST Normal file
View File

@ -0,0 +1,13 @@
@comment $OpenBSD: PLIST,v 1.1.1.1 2008/09/02 12:33:17 jasper Exp $
bin/pyzor
bin/pyzord
lib/python${MODPY_VERSION}/site-packages/pyzor/
lib/python${MODPY_VERSION}/site-packages/pyzor-0.4.0-py${MODPY_VERSION}.egg-info
lib/python${MODPY_VERSION}/site-packages/pyzor/__init__.py
lib/python${MODPY_VERSION}/site-packages/pyzor/__init__.pyc
lib/python${MODPY_VERSION}/site-packages/pyzor/client.py
lib/python${MODPY_VERSION}/site-packages/pyzor/client.pyc
lib/python${MODPY_VERSION}/site-packages/pyzor/server.py
lib/python${MODPY_VERSION}/site-packages/pyzor/server.pyc
share/doc/pyzor/
share/doc/pyzor/usage.html