Update
This commit is contained in:
parent
5c09e23172
commit
c21562a04a
50
rst2atom.py
50
rst2atom.py
|
@ -1,6 +1,8 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
filename="test.rst"
|
filename="test.rst"
|
||||||
|
generator_uri = "http://git.sdf.org/rst2atom.py"
|
||||||
|
generator_name = "rst2atom"
|
||||||
|
|
||||||
# https://docutils.sourceforge.io/docs/api/publisher.html
|
# https://docutils.sourceforge.io/docs/api/publisher.html
|
||||||
|
|
||||||
|
@ -27,6 +29,16 @@ def extract_docinfos(html):
|
||||||
soup1.find_all("th", {"class": "docinfo-name"})
|
soup1.find_all("th", {"class": "docinfo-name"})
|
||||||
] }
|
] }
|
||||||
|
|
||||||
|
def extract_meta(html):
|
||||||
|
"""
|
||||||
|
Parse the publish_parts dictionary and return a dictionary containing
|
||||||
|
the metadata from the RsT document.
|
||||||
|
"""
|
||||||
|
soup1 = bs4.BeautifulSoup(html["meta"], 'html.parser')
|
||||||
|
return { m.attrs["name"]: m.attrs["content"]
|
||||||
|
for m in soup1.find_all("meta", {"name": True,
|
||||||
|
"content": True}) }
|
||||||
|
|
||||||
|
|
||||||
def find_date(d):
|
def find_date(d):
|
||||||
"""
|
"""
|
||||||
|
@ -76,20 +88,36 @@ def build_entry(d):
|
||||||
</entry>
|
</entry>
|
||||||
""")
|
""")
|
||||||
|
|
||||||
print("""<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
||||||
<title>%s</title>
|
|
||||||
<updated>%s</updated>
|
|
||||||
<link href="http://example.org/feed/" rel="self" />
|
|
||||||
<link href="http://example.org/" />
|
|
||||||
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
|
|
||||||
""" % (
|
|
||||||
html["title"],
|
|
||||||
datetime.datetime.now().isoformat()
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
docinfos = extract_docinfos(html)
|
docinfos = extract_docinfos(html)
|
||||||
|
meta = extract_meta(html)
|
||||||
|
|
||||||
|
print("""<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>%s</id>
|
||||||
|
<title>%s</title>
|
||||||
|
<author>
|
||||||
|
<name>%s</name>
|
||||||
|
<email>%s</email>
|
||||||
|
</author>
|
||||||
|
<rights>%s</rights>
|
||||||
|
<updated>%s</updated>
|
||||||
|
<link href="%s" />
|
||||||
|
<link href="%s" rel="self" />
|
||||||
|
<generator uri="%s">%s</generator>
|
||||||
|
""" % (
|
||||||
|
meta["original-source"],
|
||||||
|
html["title"],
|
||||||
|
docinfos["Author:"],
|
||||||
|
docinfos["Contact:"],
|
||||||
|
meta["copyright"],
|
||||||
|
datetime.datetime.now().isoformat(),
|
||||||
|
meta["original-source"],
|
||||||
|
meta["syndication-source"],
|
||||||
|
generator_uri, generator_name
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
soup2 = bs4.BeautifulSoup(html["body"], 'html.parser')
|
soup2 = bs4.BeautifulSoup(html["body"], 'html.parser')
|
||||||
divs = soup2.select("div")
|
divs = soup2.select("div")
|
||||||
|
|
Loading…
Reference in New Issue