%s

section and return a an ISO8601 formatted string being the first recognized date string. Raise dateutil.parser.ParserError if no such comment contains a date. In the RestructuredText, the date would be typically written as: This is a new entry ~~~~~~~~~~~~~~~~~~~ .. Note: created on 2022/11/05 Blah... """ comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment)) for c in comments: for k in c.extract().split(" "): try: t = dateutil.parser.parse(k) return t.isoformat() except dateutil.parser.ParserError: pass raise dateutil.parser.ParserError def build_entry(d): """ Print on stdout an Atom section built from the

. """ # Get the date mydate = "" try: mydate = find_date(d) except dateutil.parser.ParserError: pass print(""" %s urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s

""" % ( d.find("h1").text, mydate, d )) docinfos = extract_docinfos(html) meta = extract_meta(html) soup2 = bs4.BeautifulSoup(html["body"], 'html.parser') divs = soup2.select("div") print(""" %s %s %s %s %s %s %s """ % ( meta["original-source"], html["title"], docinfos["Author:"], docinfos["Contact:"], meta["copyright"], datetime.datetime.now().isoformat(), meta["original-source"], meta["syndication-source"], generator_uri, generator_name )) for d in divs: # don't handle subsections if d.find_parent("div"): continue build_entry(d) print("")