Files
py-flask-imageboard/scripts/populate_db.py
2026-01-20 21:40:46 +00:00

197 lines
9.1 KiB
Python

"""
Script to populate the database with realistic sample conversations
"""
import os
import sys
from datetime import datetime, timedelta
import random
from faker import Faker
# Add the project root to the path so we can import modules
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, project_root)
from core.app import create_app
from database.models import db, Board, Thread, Post
fake = Faker()
def create_sample_data():
app = create_app()
with app.app_context():
# Clear existing data
Post.query.delete()
Thread.query.delete()
Board.query.delete()
# Create sample boards
boards = [
Board(name='general', title='General Discussion', description='Talk about anything'),
Board(name='tech', title='Technology', description='Tech discussions'),
Board(name='random', title='Random', description='Random thoughts'),
Board(name='gaming', title='Gaming', description='Video game discussions'),
Board(name='programming', title='Programming', description='Code and programming discussions')
]
for board in boards:
db.session.add(board)
db.session.commit()
# Sample conversations for each board
conversations = {
'general': [
[
("Anonymous", "Hey everyone, what's everyone up to today?"),
("TripUser", "Just finished my morning coffee and reading news", "!!tripcode1"),
("Anonymous", "Same here. Anyone tried that new restaurant downtown?"),
("Foodie", "Oh man, the pasta there is amazing!", "!!foodtrip"),
("Anonymous", "Thanks for the recommendation!")
],
[
("MorningPerson", "Early bird here, anyone else up?"),
("NightOwl", "Why are you awake so early???", "!!night"),
("Anonymous", "LOL, I'm somewhere in between"),
("MorningPerson", "Some of us have responsibilities :)", "!!early"),
("NightOwl", "Fair enough, enjoy your productivity", "!!night")
]
],
'tech': [
[
("TechEnthusiast", "Just got my hands on the new GPU, benchmarks are impressive!"),
("BudgetGamer", "How much did it cost though? Asking for a friend...", "!!budget"),
("TechEnthusiast", "Worth every penny if you're into gaming/rendering", "!!tech"),
("Developer", "What about power consumption? That's usually the concern", "!!dev"),
("TechEnthusiast", "TDP is reasonable, around 250W under load", "!!tech")
],
[
("LinuxUser", "Switched to Linux full time, no regrets so far"),
("WindowsFan", "But what about gaming compatibility?", "!!windows"),
("LinuxUser", "Proton and Lutris handle most games pretty well now", "!!linux"),
("MacUser", "Have you tried the new M3 Macs? Performance is great", "!!mac"),
("LinuxUser", "Interesting, but I prefer the customization options", "!!linux")
]
],
'random': [
[
("Philosopher", "If a tree falls in a forest and no one is around, does it make a sound?"),
("Scientist", "Yes, it creates vibrations in the air regardless of observation", "!!science"),
("Artist", "But does it matter if no one experiences it?", "!!art"),
("Philosopher", "That's the real question, isn't it?", "!!phil"),
("Random", "I think we're overthinking this", "!!overthinker")
]
],
'gaming': [
[
("RPGFan", "Just finished Elden Ring, absolutely incredible world design!"),
("CasualGamer", "How many hours did it take you?", "!!casual"),
("RPGFan", "Around 87 hours, and I didn't even complete everything", "!!rpg"),
("SpeedRunner", "I beat it in 25 hours, but missed most content", "!!speed"),
("RPGFan", "Different playstyles, both valid approaches", "!!rpg")
]
],
'programming': [
[
("PythonDev", "Working on a new web scraping project, Python is so versatile"),
("JavaDev", "Still prefer Java for enterprise applications though", "!!java"),
("PythonDev", "Each has its place, Python just feels more intuitive", "!!python"),
("JSDev", "Don't forget about JavaScript for backend too!", "!!js"),
("PythonDev", "True, Node.js has its merits", "!!python"),
("GoDev", "Have you considered Go for performance-critical tasks?", "!!go"),
("PythonDev", "Maybe for certain use cases, depends on requirements", "!!python")
]
]
}
# Create threads and posts
for board_name, board_conversations in conversations.items():
board = Board.query.filter_by(name=board_name).first()
for conversation in board_conversations:
# Create a thread
thread_subject = fake.sentence(nb_words=random.randint(3, 8)).rstrip('.')
thread = Thread(
subject=thread_subject,
board=board,
bumped_at=datetime.utcnow()
)
db.session.add(thread)
db.session.flush() # Get the ID without committing
# Create posts for the thread
for i, post_data in enumerate(conversation):
author_name = post_data[0]
content = post_data[1]
tripcode = post_data[2] if len(post_data) > 2 else None
# Occasionally add very long posts
if random.choice([True, False, False, False]): # 25% chance of long post
content += " " + fake.paragraph(nb_sentences=random.randint(5, 15))
# Occasionally add extremely long posts to test wrapping
if random.choice([True, False, False, False, False, False, False, False, False, False]): # 10% chance
long_sentence = " ".join([fake.word() for _ in range(random.randint(50, 200))])
content += " " + long_sentence
post = Post(
content=content,
author_name=author_name,
tripcode=tripcode,
ip_address=fake.ipv4(),
thread=thread,
created_at=datetime.utcnow() - timedelta(minutes=random.randint(0, 1000))
)
db.session.add(post)
# Update bumped_at to the latest post time
thread.bumped_at = datetime.utcnow()
# Also add some threads with very long single posts to test the wrapping
general_board = Board.query.filter_by(name='general').first()
for i in range(3):
thread = Thread(
subject=fake.sentence(nb_words=random.randint(3, 8)).rstrip('.'),
board=general_board,
bumped_at=datetime.utcnow()
)
db.session.add(thread)
db.session.flush()
# Very long post to test wrapping
very_long_content = ""
for j in range(random.randint(10, 30)):
very_long_content += fake.paragraph(nb_sentences=random.randint(3, 8)) + "\n\n"
# Add a super long line to test word breaking
super_long_line = "supercalifragilisticexpialidocious" * 50
very_long_content += "\n" + super_long_line + "\n"
post = Post(
content=very_long_content,
author_name="LongWriter",
tripcode="!!verbose",
ip_address=fake.ipv4(),
thread=thread,
created_at=datetime.utcnow() - timedelta(minutes=random.randint(0, 500))
)
db.session.add(post)
db.session.commit()
print("Database populated with sample conversations!")
print(f"Created {Board.query.count()} boards")
print(f"Created {Thread.query.count()} threads")
print(f"Created {Post.query.count()} posts")
if __name__ == "__main__":
# Install faker if not already installed
try:
import faker
except ImportError:
print("Installing faker library...")
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "faker"])
import faker
create_sample_data()