Initial commit
This commit is contained in:
commit
30e2b29161
41
rst2atom.py
Normal file
41
rst2atom.py
Normal file
@ -0,0 +1,41 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
filename="test.rst"
|
||||
|
||||
# https://docutils.sourceforge.io/docs/api/publisher.html
|
||||
|
||||
import docutils.core
|
||||
import dateutil.parser
|
||||
import bs4
|
||||
|
||||
html = docutils.core.publish_parts(open(filename, mode="r").read(),
|
||||
source_path=filename,
|
||||
writer_name='html')
|
||||
|
||||
def find_date(d):
|
||||
comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment))
|
||||
for c in comments:
|
||||
for k in c.extract().split(" "):
|
||||
try:
|
||||
t = dateutil.parser.parse(k)
|
||||
return t
|
||||
except dateutil.parser.ParserError:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
print("TITRE:", html["title"])
|
||||
|
||||
soup = bs4.BeautifulSoup(html["body"], 'html.parser')
|
||||
divs = soup.select("div")
|
||||
|
||||
for d in divs:
|
||||
# don't handle subsections
|
||||
if d.find_parent("div"): continue
|
||||
|
||||
print("="*40)
|
||||
print(d)
|
||||
print(find_date(d))
|
Loading…
Reference in New Issue
Block a user