feeds/rst2atom.py

42 lines
916 B
Python
Raw Normal View History

2022-11-04 23:19:53 +00:00
# -*- coding: utf-8 -*-
filename="test.rst"
# https://docutils.sourceforge.io/docs/api/publisher.html
import docutils.core
import dateutil.parser
import bs4
html = docutils.core.publish_parts(open(filename, mode="r").read(),
source_path=filename,
writer_name='html')
def find_date(d):
comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment))
for c in comments:
for k in c.extract().split(" "):
try:
t = dateutil.parser.parse(k)
return t
except dateutil.parser.ParserError:
pass
print("TITRE:", html["title"])
soup = bs4.BeautifulSoup(html["body"], 'html.parser')
divs = soup.select("div")
for d in divs:
# don't handle subsections
if d.find_parent("div"): continue
print("="*40)
print(d)
print(find_date(d))