%s

section and return a an RFC 3339 formatted string being the first recognized date string. Raise dateutil.parser.ParserError if no such comment contains a date. In the RestructuredText, the date would be typically written as: This is a new entry ~~~~~~~~~~~~~~~~~~~ .. Note: created on 2022/11/05 Blah... """ comments = d.find_all(string=lambda text: isinstance(text, bs4.Comment)) for c in comments: for k in c.extract().split(" "): try: t = dateutil.parser.parse(k) return t.isoformat() + "Z" except dateutil.parser.ParserError: pass raise dateutil.parser.ParserError def build_entry(d, base_url): """ Print on stdout an Atom section built from the

. """ # Get the date mydate = "" mytitle = d.find("h1").text try: mydate = find_date(d) except dateutil.parser.ParserError: pass print(""" %s %s %s

""" % ( base_url + "#" + mytitle.lower().replace(" ", "-") + "_(" + mydate + ")", mytitle, mydate, d )) docinfos = extract_docinfos(html) meta = extract_meta(html) soup2 = bs4.BeautifulSoup(html["body"], 'html.parser') divs = soup2.select("div") print(""" %s %s %s %s %s %s %s %s """ % ( meta["original-source"], html["title"], meta["description"], docinfos["Author:"], docinfos["Contact:"], meta["copyright"], datetime.datetime.utcnow().isoformat() + "Z", meta["original-source"], meta["syndication-source"], generator_uri, generator_name )) for d in divs: # don't handle subsections if d.find_parent("div"): continue build_entry(d, meta["original-source"]) print("")