mirror of
https://codeberg.org/mclemens/teletext-rss.git
synced 2024-09-27 08:55:57 -04:00
lots of improvements (XML output, database layout etc)
This commit is contained in:
parent
34ba5b79f0
commit
6c8aca866a
91
teletext.py
91
teletext.py
@ -1,8 +1,8 @@
|
|||||||
import urllib3
|
import urllib3
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import textwrap
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import time
|
||||||
from sqlite3 import Error
|
from sqlite3 import Error
|
||||||
|
|
||||||
def create_conn(db_file):
|
def create_conn(db_file):
|
||||||
@ -25,85 +25,98 @@ def create_table(conn, create_table_SQL):
|
|||||||
|
|
||||||
def insert_site(conn, site):
|
def insert_site(conn, site):
|
||||||
|
|
||||||
sql = ''' INSERT INTO sites(hash,site,content)
|
sql = ''' INSERT INTO sites(unixtime,hash,tafel,description,title)
|
||||||
VALUES(?,?,?) '''
|
VALUES(?,?,?,?,?) '''
|
||||||
try:
|
try:
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute(sql, site)
|
c.execute(sql, site)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return c.lastrowid
|
return c.lastrowid
|
||||||
except Error as e:
|
except Error as e:
|
||||||
print(e)
|
err = e
|
||||||
|
|
||||||
def get_site(conn, site):
|
def get_sites(conn):
|
||||||
#sql = ''' SELECT content from sites WHERE site = ? '''
|
sql = ''' SELECT description,title from sites order by unixtime desc limit 3 '''
|
||||||
sql = ''' SELECT content from sites '''
|
|
||||||
try:
|
try:
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
#c.execute(sql, (site,))
|
|
||||||
c.execute(sql)
|
c.execute(sql)
|
||||||
rows = c.fetchall()
|
rows = c.fetchall()
|
||||||
return rows
|
return rows
|
||||||
except Error as e:
|
except Error as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
def store_site(conn, site):
|
def store_site(conn, tafel):
|
||||||
link = "http://www.ard-text.de/mobil/"+str(site)
|
link = "http://www.ard-text.de/mobil/"+str(tafel)
|
||||||
http = urllib3.PoolManager()
|
http = urllib3.PoolManager()
|
||||||
r = http.request('GET', link)
|
r = http.request('GET', link)
|
||||||
|
|
||||||
soup = BeautifulSoup(r.data, 'html.parser')
|
soup = BeautifulSoup(r.data, 'html.parser')
|
||||||
bla = soup.find('div', class_='std').text
|
desc = soup.find('div', class_='std')
|
||||||
bla_hash = hashlib.md5(bla.encode('utf-8')).hexdigest()
|
title = soup.find('h1')
|
||||||
content = (bla_hash,site,bla)
|
if desc is not None:
|
||||||
insert_site(conn,content)
|
if title is not None:
|
||||||
|
title = title.text.replace("<h1>","")
|
||||||
|
title = title.replace("<b>","")
|
||||||
|
title = title.replace("</h1>","")
|
||||||
|
title = title.replace("</b>","")
|
||||||
|
else:
|
||||||
|
title = "N/A"
|
||||||
|
unixtime = time.time()
|
||||||
|
desc = desc.text
|
||||||
|
desc_hash = hashlib.md5(desc.encode('utf-8')).hexdigest()
|
||||||
|
content = (unixtime,desc_hash,tafel,desc,title)
|
||||||
|
insert_site(conn,content)
|
||||||
|
|
||||||
def gen_rss():
|
def gen_rss(rows):
|
||||||
out = """
|
out = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
<?xml version=1.0 encoding=UTF-8 ?>
|
<rss version="2.0">
|
||||||
<rss version=2.0>
|
|
||||||
|
|
||||||
<channel>
|
<channel>
|
||||||
<title>W3Schools Home Page</title>
|
<title>ARD Teletext RSS Feed (inofficial)</title>
|
||||||
<link>https://www.w3schools.com</link>
|
<link>https://www.exitnode.net</link>
|
||||||
<description>Free web building tutorials</description>"""
|
<description>bla</description>"""
|
||||||
|
|
||||||
#bla = """ <item>
|
for r in rows:
|
||||||
# <title>RSS Tutorial</title>
|
cont = r[0]
|
||||||
# <link>https://www.w3schools.com/xml/xml_rss.asp</link>
|
title = r[1]
|
||||||
# <description>New RSS tutorial on W3Schools</description>
|
|
||||||
# </item>"""
|
|
||||||
|
|
||||||
out += """ </channel>
|
if cont is not None:
|
||||||
|
cont = cont.replace("\n","")
|
||||||
|
out+= """
|
||||||
|
<item>
|
||||||
|
<title>""" + title + """</title>
|
||||||
|
<description>
|
||||||
|
""" + cont + """
|
||||||
|
</description>
|
||||||
|
</item>"""
|
||||||
|
|
||||||
|
out += """
|
||||||
|
</channel>
|
||||||
</rss>"""
|
</rss>"""
|
||||||
|
|
||||||
print(out)
|
print(out)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
db = r"/home/micha/bla.db"
|
db = r"/home/micha/bla.db"
|
||||||
sql_create_sites_table = """CREATE TABLE IF NOT EXISTS sites (
|
sql_create_sites_table = """CREATE TABLE IF NOT EXISTS sites (
|
||||||
|
unixtime int NOT NULL,
|
||||||
hash text PRIMARY KEY,
|
hash text PRIMARY KEY,
|
||||||
site int,
|
tafel int,
|
||||||
content text
|
description text,
|
||||||
|
title text
|
||||||
); """
|
); """
|
||||||
|
|
||||||
#conn = create_conn(db)
|
conn = create_conn(db)
|
||||||
conn = None
|
|
||||||
if conn is not None:
|
if conn is not None:
|
||||||
create_table(conn, sql_create_sites_table)
|
create_table(conn, sql_create_sites_table)
|
||||||
for s in range(104, 116):
|
for s in range(104, 116):
|
||||||
store_site(conn,s)
|
store_site(conn,s)
|
||||||
|
|
||||||
rows = get_site(conn,"104")
|
rows = get_sites(conn)
|
||||||
for row in rows:
|
gen_rss(rows)
|
||||||
#r = row[0].replace("^ ", "")
|
|
||||||
#print(row[0])
|
|
||||||
|
|
||||||
print(textwrap.fill(r, 40))
|
|
||||||
else:
|
else:
|
||||||
print("Error: No db conn")
|
print("Error: No db conn")
|
||||||
|
|
||||||
gen_rss()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user