commit 30e2b29161ab285e2ff453ceb408fb4fefcd96ff Author: Thomas Baruchel Date: Sat Nov 5 00:19:53 2022 +0100 Initial commit diff --git a/rst2atom.py b/rst2atom.py new file mode 100644 index 0000000..0aef9c5 --- /dev/null +++ b/rst2atom.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +filename="test.rst" + +# https://docutils.sourceforge.io/docs/api/publisher.html + +import docutils.core +import dateutil.parser +import bs4 + +html = docutils.core.publish_parts(open(filename, mode="r").read(), + source_path=filename, + writer_name='html') + +def find_date(d): + comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment)) + for c in comments: + for k in c.extract().split(" "): + try: + t = dateutil.parser.parse(k) + return t + except dateutil.parser.ParserError: + pass + + + + + + +print("TITRE:", html["title"]) + +soup = bs4.BeautifulSoup(html["body"], 'html.parser') +divs = soup.select("div") + +for d in divs: + # don't handle subsections + if d.find_parent("div"): continue + + print("="*40) + print(d) + print(find_date(d))