From 6c8aca866a8951ddc703e5d9e09db1341a886eb6 Mon Sep 17 00:00:00 2001 From: Michael Clemens Date: Thu, 13 Aug 2020 00:44:45 +0200 Subject: [PATCH] lots of improvements (XML output, database layout etc) --- teletext.py | 93 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 40 deletions(-) diff --git a/teletext.py b/teletext.py index 2f35e58..285e5e7 100644 --- a/teletext.py +++ b/teletext.py @@ -1,8 +1,8 @@ import urllib3 from bs4 import BeautifulSoup -import textwrap import hashlib import sqlite3 +import time from sqlite3 import Error def create_conn(db_file): @@ -24,86 +24,99 @@ def create_table(conn, create_table_SQL): print(e) def insert_site(conn, site): - - sql = ''' INSERT INTO sites(hash,site,content) - VALUES(?,?,?) ''' + + sql = ''' INSERT INTO sites(unixtime,hash,tafel,description,title) + VALUES(?,?,?,?,?) ''' try: c = conn.cursor() c.execute(sql, site) conn.commit() return c.lastrowid except Error as e: - print(e) + err = e -def get_site(conn, site): - #sql = ''' SELECT content from sites WHERE site = ? ''' - sql = ''' SELECT content from sites ''' +def get_sites(conn): + sql = ''' SELECT description,title from sites order by unixtime desc limit 3 ''' try: c = conn.cursor() - #c.execute(sql, (site,)) c.execute(sql) rows = c.fetchall() return rows except Error as e: print(e) -def store_site(conn, site): - link = "http://www.ard-text.de/mobil/"+str(site) +def store_site(conn, tafel): + link = "http://www.ard-text.de/mobil/"+str(tafel) http = urllib3.PoolManager() r = http.request('GET', link) soup = BeautifulSoup(r.data, 'html.parser') - bla = soup.find('div', class_='std').text - bla_hash = hashlib.md5(bla.encode('utf-8')).hexdigest() - content = (bla_hash,site,bla) - insert_site(conn,content) + desc = soup.find('div', class_='std') + title = soup.find('h1') + if desc is not None: + if title is not None: + title = title.text.replace("

","") + title = title.replace("","") + title = title.replace("

","") + title = title.replace("","") + else: + title = "N/A" + unixtime = time.time() + desc = desc.text + desc_hash = hashlib.md5(desc.encode('utf-8')).hexdigest() + content = (unixtime,desc_hash,tafel,desc,title) + insert_site(conn,content) -def gen_rss(): - out = """ - - +def gen_rss(rows): + out = """ + - W3Schools Home Page - https://www.w3schools.com - Free web building tutorials""" + ARD Teletext RSS Feed (inofficial) + https://www.exitnode.net + bla""" - #bla = """ - # RSS Tutorial - # https://www.w3schools.com/xml/xml_rss.asp - # New RSS tutorial on W3Schools - # """ + for r in rows: + cont = r[0] + title = r[1] - out += """ + if cont is not None: + cont = cont.replace("\n","") + out+= """ + + """ + title + """ + + """ + cont + """ + + """ + + out += """ + """ + print(out) def main(): db = r"/home/micha/bla.db" sql_create_sites_table = """CREATE TABLE IF NOT EXISTS sites ( + unixtime int NOT NULL, hash text PRIMARY KEY, - site int, - content text + tafel int, + description text, + title text ); """ - #conn = create_conn(db) - conn = None + conn = create_conn(db) if conn is not None: create_table(conn, sql_create_sites_table) for s in range(104, 116): store_site(conn,s) - rows = get_site(conn,"104") - for row in rows: - #r = row[0].replace("^ ", "") - #print(row[0]) - - print(textwrap.fill(r, 40)) + rows = get_sites(conn) + gen_rss(rows) else: print("Error: No db conn") - gen_rss() - if __name__ == "__main__": main()