From be7b4bbc9513a1341273869c5321a7dbea8898a2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 6 Apr 2014 11:06:23 +0100 Subject: [PATCH] Import html --- import/importjargon.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 import/importjargon.py diff --git a/import/importjargon.py b/import/importjargon.py new file mode 100644 index 0000000..9945be5 --- /dev/null +++ b/import/importjargon.py @@ -0,0 +1,20 @@ +import os +import HTMLParser + +def jargonReadFile(filename): + inFile = open(filename) + buffer = "" + for line in inFile: + buffer = buffer + line + parser = HTMLParser.HTMLParser() + parser.feed(buffer) + +def jargonImport(rootDir): + for dirName, subdirList, fileList in os.walk(rootDir): + print('Found directory: %s' % dirName) + for filename in fileList: + print('\t%s' % filename) + jargonReadFile(dirName + '/' + filename) + +if __name__ == "__main__": + jargonImport('original')