This commit is contained in:
Thomas Baruchel 2022-11-05 12:13:14 +01:00
parent 5c09e23172
commit c21562a04a
1 changed files with 39 additions and 11 deletions

View File

@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
filename="test.rst"
generator_uri = "http://git.sdf.org/rst2atom.py"
generator_name = "rst2atom"
# https://docutils.sourceforge.io/docs/api/publisher.html
@ -27,6 +29,16 @@ def extract_docinfos(html):
soup1.find_all("th", {"class": "docinfo-name"})
] }
def extract_meta(html):
"""
Parse the publish_parts dictionary and return a dictionary containing
the metadata from the RsT document.
"""
soup1 = bs4.BeautifulSoup(html["meta"], 'html.parser')
return { m.attrs["name"]: m.attrs["content"]
for m in soup1.find_all("meta", {"name": True,
"content": True}) }
def find_date(d):
"""
@ -76,20 +88,36 @@ def build_entry(d):
</entry>
""")
print("""<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>%s</title>
<updated>%s</updated>
<link href="http://example.org/feed/" rel="self" />
<link href="http://example.org/" />
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
""" % (
html["title"],
datetime.datetime.now().isoformat()
))
docinfos = extract_docinfos(html)
meta = extract_meta(html)
print("""<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<id>%s</id>
<title>%s</title>
<author>
<name>%s</name>
<email>%s</email>
</author>
<rights>%s</rights>
<updated>%s</updated>
<link href="%s" />
<link href="%s" rel="self" />
<generator uri="%s">%s</generator>
""" % (
meta["original-source"],
html["title"],
docinfos["Author:"],
docinfos["Contact:"],
meta["copyright"],
datetime.datetime.now().isoformat(),
meta["original-source"],
meta["syndication-source"],
generator_uri, generator_name
))
soup2 = bs4.BeautifulSoup(html["body"], 'html.parser')
divs = soup2.select("div")