diff --git a/rst2atom.py b/rst2atom.py index 0aef9c5..b765cc1 100644 --- a/rst2atom.py +++ b/rst2atom.py @@ -6,31 +6,89 @@ filename="test.rst" import docutils.core import dateutil.parser +import datetime import bs4 html = docutils.core.publish_parts(open(filename, mode="r").read(), source_path=filename, writer_name='html') + +def extract_docinfos(html): + soup1 = bs4.BeautifulSoup(html["docinfo"], 'html.parser') + return { tr.find("th").text: + tr.find("td").text + for tr in [ + e.find_parent("tr") for e in + soup1.find_all("th", {"class": "docinfo-name"}) + ] } + + def find_date(d): + """ + Parse the comments in a
section and return a an ISO8601 formatted + string being the first recognized date string. + Raise dateutil.parser.ParserError if no such comment contains a date. + + In the RestructuredText, the date would be typically written as: + + This is a new entry + ~~~~~~~~~~~~~~~~~~~ + .. Note: created on 2022/11/05 + + Blah... + """ comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment)) for c in comments: for k in c.extract().split(" "): try: t = dateutil.parser.parse(k) - return t + return t.isoformat() except dateutil.parser.ParserError: pass + raise dateutil.parser.ParserError +def build_entry(d): + """ + Print on stdout an Atom section built from the
. + """ + print(""" + Atom-Powered Robots Run Amok + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-11-09T17:23:02Z + +
+

This is the entry content.

+
+
+ + John Doe + johndoe@example.com + +
+ """) + +print(""" + + %s + %s + + + urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6 +""" % ( + html["title"], + datetime.datetime.now().isoformat() +)) +docinfos = extract_docinfos(html) - -print("TITRE:", html["title"]) - -soup = bs4.BeautifulSoup(html["body"], 'html.parser') -divs = soup.select("div") +soup2 = bs4.BeautifulSoup(html["body"], 'html.parser') +divs = soup2.select("div") for d in divs: # don't handle subsections @@ -38,4 +96,12 @@ for d in divs: print("="*40) print(d) - print(find_date(d)) + try: + print(find_date(d)) + except dateutil.parser.ParserError: + pass + + + + +print("")