From 30e2b29161ab285e2ff453ceb408fb4fefcd96ff Mon Sep 17 00:00:00 2001 From: Thomas Baruchel Date: Sat, 5 Nov 2022 00:19:53 +0100 Subject: [PATCH] Initial commit --- rst2atom.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 rst2atom.py diff --git a/rst2atom.py b/rst2atom.py new file mode 100644 index 0000000..0aef9c5 --- /dev/null +++ b/rst2atom.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +filename="test.rst" + +# https://docutils.sourceforge.io/docs/api/publisher.html + +import docutils.core +import dateutil.parser +import bs4 + +html = docutils.core.publish_parts(open(filename, mode="r").read(), + source_path=filename, + writer_name='html') + +def find_date(d): + comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment)) + for c in comments: + for k in c.extract().split(" "): + try: + t = dateutil.parser.parse(k) + return t + except dateutil.parser.ParserError: + pass + + + + + + +print("TITRE:", html["title"]) + +soup = bs4.BeautifulSoup(html["body"], 'html.parser') +divs = soup.select("div") + +for d in divs: + # don't handle subsections + if d.find_parent("div"): continue + + print("="*40) + print(d) + print(find_date(d))