Allow an exclusions file

This commit is contained in:
Bob Mottram 2014-04-26 15:11:49 +01:00
parent 21b3f80dc3
commit 7b47979202

View File

@ -129,7 +129,7 @@ def jargonCreateEntry(title, text, outputDir):
filename = outputDir
if not outputDir.endswith('/'):
filename = filename + '/'
filename = filename + jargonSaneTitle(title) + '.txt'
filename = filename + title + '.txt'
# don't overwrite existing files
if os.path.isfile(filename):
@ -140,7 +140,7 @@ def jargonCreateEntry(title, text, outputDir):
fp.close
return filename
def jargonReadFile(filename, outputDir):
def jargonReadFile(filename, exclusions, outputDir):
inFile = open(filename)
buffer = ""
for line in inFile:
@ -151,16 +151,31 @@ def jargonReadFile(filename, outputDir):
parser.bodyText is not '' and \
len(parser.title) > 1:
saneBodyText = jargonSaneText(parser.title, parser.bodyText)
parser.title = jargonSaneTitle(parser.title)
if not parser.title in exclusions:
print jargonCreateEntry(parser.title, saneBodyText, outputDir)
#if saneBodyText == "":
#print "Title: " + parser.title
# print "Original: " + parser.bodyText
#print "Text: " + saneBodyText + "\n"
# read original jargon file entries to be excluded
def jargonReadExclusions(filename):
if len(filename) == 0:
return []
if not os.path.isfile(filename):
return []
exclusions = []
with open(filename) as fp:
exclusions = fp.readlines()
fp.close()
return exclusions
def jargonImport(rootDir, excludeEntriesFilename, outputDir):
exclusions = jargonReadExclusions(excludeEntriesFilename)
for dirName, subdirList, fileList in os.walk(rootDir):
for filename in fileList:
jargonReadFile(dirName + '/' + filename, outputDir)
jargonReadFile(dirName + '/' + filename, exclusions, outputDir)
if __name__ == "__main__":
jargonImport('../original','','../entries')