Allow an exclusions file
This commit is contained in:
parent
21b3f80dc3
commit
7b47979202
@ -129,7 +129,7 @@ def jargonCreateEntry(title, text, outputDir):
|
|||||||
filename = outputDir
|
filename = outputDir
|
||||||
if not outputDir.endswith('/'):
|
if not outputDir.endswith('/'):
|
||||||
filename = filename + '/'
|
filename = filename + '/'
|
||||||
filename = filename + jargonSaneTitle(title) + '.txt'
|
filename = filename + title + '.txt'
|
||||||
|
|
||||||
# don't overwrite existing files
|
# don't overwrite existing files
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
@ -140,7 +140,7 @@ def jargonCreateEntry(title, text, outputDir):
|
|||||||
fp.close
|
fp.close
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
def jargonReadFile(filename, outputDir):
|
def jargonReadFile(filename, exclusions, outputDir):
|
||||||
inFile = open(filename)
|
inFile = open(filename)
|
||||||
buffer = ""
|
buffer = ""
|
||||||
for line in inFile:
|
for line in inFile:
|
||||||
@ -151,16 +151,31 @@ def jargonReadFile(filename, outputDir):
|
|||||||
parser.bodyText is not '' and \
|
parser.bodyText is not '' and \
|
||||||
len(parser.title) > 1:
|
len(parser.title) > 1:
|
||||||
saneBodyText = jargonSaneText(parser.title, parser.bodyText)
|
saneBodyText = jargonSaneText(parser.title, parser.bodyText)
|
||||||
|
parser.title = jargonSaneTitle(parser.title)
|
||||||
|
if not parser.title in exclusions:
|
||||||
print jargonCreateEntry(parser.title, saneBodyText, outputDir)
|
print jargonCreateEntry(parser.title, saneBodyText, outputDir)
|
||||||
#if saneBodyText == "":
|
|
||||||
#print "Title: " + parser.title
|
# read original jargon file entries to be excluded
|
||||||
# print "Original: " + parser.bodyText
|
def jargonReadExclusions(filename):
|
||||||
#print "Text: " + saneBodyText + "\n"
|
if len(filename) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not os.path.isfile(filename):
|
||||||
|
return []
|
||||||
|
|
||||||
|
exclusions = []
|
||||||
|
with open(filename) as fp:
|
||||||
|
exclusions = fp.readlines()
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
return exclusions
|
||||||
|
|
||||||
def jargonImport(rootDir, excludeEntriesFilename, outputDir):
|
def jargonImport(rootDir, excludeEntriesFilename, outputDir):
|
||||||
|
exclusions = jargonReadExclusions(excludeEntriesFilename)
|
||||||
|
|
||||||
for dirName, subdirList, fileList in os.walk(rootDir):
|
for dirName, subdirList, fileList in os.walk(rootDir):
|
||||||
for filename in fileList:
|
for filename in fileList:
|
||||||
jargonReadFile(dirName + '/' + filename, outputDir)
|
jargonReadFile(dirName + '/' + filename, exclusions, outputDir)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
jargonImport('../original','','../entries')
|
jargonImport('../original','','../entries')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user